{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 300, "global_step": 3821, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.3054830287206268e-08, "logits/chosen": -2.7604618072509766, "logits/rejected": -2.686812162399292, "logps/chosen": -516.73779296875, "logps/rejected": -458.60467529296875, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 1.3054830287206266e-07, "logits/chosen": -2.4880800247192383, "logits/rejected": -2.4930832386016846, "logps/chosen": -338.7858581542969, "logps/rejected": -404.5611572265625, "loss": 0.6929, "rewards/accuracies": 0.4027777910232544, "rewards/chosen": 0.00024087271594908088, "rewards/margins": 0.0006852700607851148, "rewards/rejected": -0.00044439738849177957, "step": 10 }, { "epoch": 0.01, "learning_rate": 2.610966057441253e-07, "logits/chosen": -2.475435733795166, "logits/rejected": -2.4197583198547363, "logps/chosen": -327.35919189453125, "logps/rejected": -443.83868408203125, "loss": 0.6931, "rewards/accuracies": 0.5, "rewards/chosen": -3.7859845178900287e-05, "rewards/margins": 1.1227629329368938e-05, "rewards/rejected": -4.9087520892499015e-05, "step": 20 }, { "epoch": 0.01, "learning_rate": 3.9164490861618804e-07, "logits/chosen": -2.5618598461151123, "logits/rejected": -2.5939595699310303, "logps/chosen": -348.56982421875, "logps/rejected": -416.8001403808594, "loss": 0.692, "rewards/accuracies": 0.612500011920929, "rewards/chosen": 0.00023523520212620497, "rewards/margins": 0.0015929860528558493, "rewards/rejected": -0.0013577509671449661, "step": 30 }, { "epoch": 0.01, "learning_rate": 5.221932114882506e-07, "logits/chosen": -2.5246434211730957, "logits/rejected": -2.4987733364105225, "logps/chosen": -376.7454528808594, "logps/rejected": -427.66729736328125, "loss": 0.692, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.0012155056465417147, "rewards/margins": 0.0037051704712212086, "rewards/rejected": -0.00492067588493228, "step": 40 }, { "epoch": 0.01, "learning_rate": 6.527415143603135e-07, "logits/chosen": -2.4959325790405273, "logits/rejected": -2.4452037811279297, "logps/chosen": -290.552001953125, "logps/rejected": -383.3431701660156, "loss": 0.69, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0027376641519367695, "rewards/margins": 0.005442826543003321, "rewards/rejected": -0.008180489763617516, "step": 50 }, { "epoch": 0.02, "learning_rate": 7.832898172323761e-07, "logits/chosen": -2.4295783042907715, "logits/rejected": -2.3901355266571045, "logps/chosen": -377.3544006347656, "logps/rejected": -410.72991943359375, "loss": 0.6909, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.006509931292384863, "rewards/margins": 0.004867006093263626, "rewards/rejected": -0.011376937851309776, "step": 60 }, { "epoch": 0.02, "learning_rate": 9.138381201044387e-07, "logits/chosen": -2.310166835784912, "logits/rejected": -2.279524803161621, "logps/chosen": -279.5904846191406, "logps/rejected": -370.0677795410156, "loss": 0.6889, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.007495372090488672, "rewards/margins": 0.007953675463795662, "rewards/rejected": -0.015449047088623047, "step": 70 }, { "epoch": 0.02, "learning_rate": 1.0443864229765013e-06, "logits/chosen": -2.36948561668396, "logits/rejected": -2.3835222721099854, "logps/chosen": -342.13653564453125, "logps/rejected": -447.1036682128906, "loss": 0.6878, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.010767060332000256, "rewards/margins": 0.013605493120849133, "rewards/rejected": -0.024372553452849388, "step": 80 }, { "epoch": 0.02, "learning_rate": 1.1749347258485642e-06, "logits/chosen": -2.472949266433716, "logits/rejected": -2.3902525901794434, "logps/chosen": -325.2154541015625, "logps/rejected": -401.51751708984375, "loss": 0.6844, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.012460511177778244, "rewards/margins": 0.01636466197669506, "rewards/rejected": -0.028825175017118454, "step": 90 }, { "epoch": 0.03, "learning_rate": 1.305483028720627e-06, "logits/chosen": -2.5286309719085693, "logits/rejected": -2.5337207317352295, "logps/chosen": -365.7882080078125, "logps/rejected": -409.24261474609375, "loss": 0.6836, "rewards/accuracies": 0.75, "rewards/chosen": -0.017355522140860558, "rewards/margins": 0.019160564988851547, "rewards/rejected": -0.036516088992357254, "step": 100 }, { "epoch": 0.03, "learning_rate": 1.4360313315926894e-06, "logits/chosen": -2.485241651535034, "logits/rejected": -2.473548412322998, "logps/chosen": -337.1002197265625, "logps/rejected": -444.09832763671875, "loss": 0.6793, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.023945586755871773, "rewards/margins": 0.02508346177637577, "rewards/rejected": -0.049029044806957245, "step": 110 }, { "epoch": 0.03, "learning_rate": 1.5665796344647521e-06, "logits/chosen": -2.437514066696167, "logits/rejected": -2.439671516418457, "logps/chosen": -343.27777099609375, "logps/rejected": -444.11639404296875, "loss": 0.6747, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.028381770476698875, "rewards/margins": 0.039804860949516296, "rewards/rejected": -0.06818662583827972, "step": 120 }, { "epoch": 0.03, "learning_rate": 1.6971279373368146e-06, "logits/chosen": -2.4196202754974365, "logits/rejected": -2.329251527786255, "logps/chosen": -380.3607482910156, "logps/rejected": -435.93896484375, "loss": 0.6724, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.03702790290117264, "rewards/margins": 0.049176327884197235, "rewards/rejected": -0.08620421588420868, "step": 130 }, { "epoch": 0.04, "learning_rate": 1.8276762402088774e-06, "logits/chosen": -2.436796188354492, "logits/rejected": -2.4008259773254395, "logps/chosen": -364.23687744140625, "logps/rejected": -456.6722106933594, "loss": 0.6653, "rewards/accuracies": 0.8125, "rewards/chosen": -0.04911624267697334, "rewards/margins": 0.06083670258522034, "rewards/rejected": -0.10995294898748398, "step": 140 }, { "epoch": 0.04, "learning_rate": 1.9582245430809403e-06, "logits/chosen": -2.465663433074951, "logits/rejected": -2.4756152629852295, "logps/chosen": -344.59808349609375, "logps/rejected": -450.5936584472656, "loss": 0.6615, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.07244648039340973, "rewards/margins": 0.07592395693063736, "rewards/rejected": -0.1483704298734665, "step": 150 }, { "epoch": 0.04, "learning_rate": 2.0887728459530026e-06, "logits/chosen": -2.365227222442627, "logits/rejected": -2.3483099937438965, "logps/chosen": -365.65509033203125, "logps/rejected": -459.9405822753906, "loss": 0.6494, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.088950976729393, "rewards/margins": 0.0978541225194931, "rewards/rejected": -0.1868050992488861, "step": 160 }, { "epoch": 0.04, "learning_rate": 2.2193211488250653e-06, "logits/chosen": -2.5415358543395996, "logits/rejected": -2.4844305515289307, "logps/chosen": -417.45013427734375, "logps/rejected": -502.04888916015625, "loss": 0.6513, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.12002843618392944, "rewards/margins": 0.09641442447900772, "rewards/rejected": -0.21644285321235657, "step": 170 }, { "epoch": 0.05, "learning_rate": 2.3498694516971284e-06, "logits/chosen": -2.373419761657715, "logits/rejected": -2.328564167022705, "logps/chosen": -308.98297119140625, "logps/rejected": -423.2518005371094, "loss": 0.6334, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.10061755031347275, "rewards/margins": 0.14423246681690216, "rewards/rejected": -0.2448500096797943, "step": 180 }, { "epoch": 0.05, "learning_rate": 2.4804177545691907e-06, "logits/chosen": -2.4381210803985596, "logits/rejected": -2.4739632606506348, "logps/chosen": -366.0082092285156, "logps/rejected": -444.33233642578125, "loss": 0.6237, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.15544722974300385, "rewards/margins": 0.15099851787090302, "rewards/rejected": -0.30644577741622925, "step": 190 }, { "epoch": 0.05, "learning_rate": 2.610966057441254e-06, "logits/chosen": -2.3546931743621826, "logits/rejected": -2.3614845275878906, "logps/chosen": -413.4657287597656, "logps/rejected": -487.9469299316406, "loss": 0.6049, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.21883895993232727, "rewards/margins": 0.22747401893138885, "rewards/rejected": -0.44631296396255493, "step": 200 }, { "epoch": 0.05, "learning_rate": 2.741514360313316e-06, "logits/chosen": -2.3287081718444824, "logits/rejected": -2.2180962562561035, "logps/chosen": -397.4319763183594, "logps/rejected": -475.10614013671875, "loss": 0.6212, "rewards/accuracies": 0.6875, "rewards/chosen": -0.2562285363674164, "rewards/margins": 0.17265436053276062, "rewards/rejected": -0.4288829267024994, "step": 210 }, { "epoch": 0.06, "learning_rate": 2.872062663185379e-06, "logits/chosen": -2.3937861919403076, "logits/rejected": -2.323655366897583, "logps/chosen": -393.7010498046875, "logps/rejected": -540.32958984375, "loss": 0.5708, "rewards/accuracies": 0.8125, "rewards/chosen": -0.31672877073287964, "rewards/margins": 0.32215583324432373, "rewards/rejected": -0.6388846039772034, "step": 220 }, { "epoch": 0.06, "learning_rate": 3.0026109660574416e-06, "logits/chosen": -2.2199885845184326, "logits/rejected": -2.1488921642303467, "logps/chosen": -362.84710693359375, "logps/rejected": -462.92755126953125, "loss": 0.57, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.4027708172798157, "rewards/margins": 0.28824615478515625, "rewards/rejected": -0.6910169124603271, "step": 230 }, { "epoch": 0.06, "learning_rate": 3.1331592689295043e-06, "logits/chosen": -2.1226742267608643, "logits/rejected": -2.128727674484253, "logps/chosen": -443.38226318359375, "logps/rejected": -544.7005615234375, "loss": 0.5704, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.5918055176734924, "rewards/margins": 0.33963826298713684, "rewards/rejected": -0.9314438104629517, "step": 240 }, { "epoch": 0.07, "learning_rate": 3.263707571801567e-06, "logits/chosen": -2.2666714191436768, "logits/rejected": -2.192073106765747, "logps/chosen": -407.093505859375, "logps/rejected": -539.2747802734375, "loss": 0.53, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.6421019434928894, "rewards/margins": 0.4279584288597107, "rewards/rejected": -1.0700603723526, "step": 250 }, { "epoch": 0.07, "learning_rate": 3.3942558746736293e-06, "logits/chosen": -2.1499412059783936, "logits/rejected": -2.0672426223754883, "logps/chosen": -427.7845764160156, "logps/rejected": -526.4454956054688, "loss": 0.5201, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.7411731481552124, "rewards/margins": 0.33993035554885864, "rewards/rejected": -1.0811034440994263, "step": 260 }, { "epoch": 0.07, "learning_rate": 3.524804177545692e-06, "logits/chosen": -2.0998997688293457, "logits/rejected": -2.022254467010498, "logps/chosen": -459.3646545410156, "logps/rejected": -625.1860961914062, "loss": 0.5132, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.9105283617973328, "rewards/margins": 0.5623170137405396, "rewards/rejected": -1.472845435142517, "step": 270 }, { "epoch": 0.07, "learning_rate": 3.6553524804177547e-06, "logits/chosen": -2.058182716369629, "logits/rejected": -1.990330457687378, "logps/chosen": -422.2322692871094, "logps/rejected": -509.48504638671875, "loss": 0.5447, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.8607247471809387, "rewards/margins": 0.5177969932556152, "rewards/rejected": -1.3785216808319092, "step": 280 }, { "epoch": 0.08, "learning_rate": 3.7859007832898174e-06, "logits/chosen": -1.908735990524292, "logits/rejected": -1.7987762689590454, "logps/chosen": -532.5482788085938, "logps/rejected": -675.1096801757812, "loss": 0.4906, "rewards/accuracies": 0.75, "rewards/chosen": -1.1365267038345337, "rewards/margins": 0.7340434789657593, "rewards/rejected": -1.870570421218872, "step": 290 }, { "epoch": 0.08, "learning_rate": 3.9164490861618806e-06, "logits/chosen": -1.999489188194275, "logits/rejected": -1.8826462030410767, "logps/chosen": -510.0675354003906, "logps/rejected": -654.4682006835938, "loss": 0.4906, "rewards/accuracies": 0.75, "rewards/chosen": -1.1803276538848877, "rewards/margins": 0.692645788192749, "rewards/rejected": -1.8729734420776367, "step": 300 }, { "epoch": 0.08, "eval_logits/chosen": -1.7535996437072754, "eval_logits/rejected": -1.6233811378479004, "eval_logps/chosen": -473.8013610839844, "eval_logps/rejected": -603.2532958984375, "eval_loss": 0.5339562892913818, "eval_rewards/accuracies": 0.7310000061988831, "eval_rewards/chosen": -1.181405782699585, "eval_rewards/margins": 0.661102831363678, "eval_rewards/rejected": -1.8425085544586182, "eval_runtime": 1389.4464, "eval_samples_per_second": 1.439, "eval_steps_per_second": 0.36, "step": 300 }, { "epoch": 0.08, "learning_rate": 4.046997389033943e-06, "logits/chosen": -1.8269140720367432, "logits/rejected": -1.690843939781189, "logps/chosen": -498.0406799316406, "logps/rejected": -633.5381469726562, "loss": 0.5566, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.2140557765960693, "rewards/margins": 0.597601592540741, "rewards/rejected": -1.811657190322876, "step": 310 }, { "epoch": 0.08, "learning_rate": 4.177545691906005e-06, "logits/chosen": -1.767525315284729, "logits/rejected": -1.5722942352294922, "logps/chosen": -499.41839599609375, "logps/rejected": -658.9512329101562, "loss": 0.5073, "rewards/accuracies": 0.8125, "rewards/chosen": -1.2438334226608276, "rewards/margins": 0.7738653421401978, "rewards/rejected": -2.0176987648010254, "step": 320 }, { "epoch": 0.09, "learning_rate": 4.308093994778068e-06, "logits/chosen": -1.8579235076904297, "logits/rejected": -1.7731454372406006, "logps/chosen": -443.87689208984375, "logps/rejected": -572.9539184570312, "loss": 0.563, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.1261751651763916, "rewards/margins": 0.6167136430740356, "rewards/rejected": -1.7428886890411377, "step": 330 }, { "epoch": 0.09, "learning_rate": 4.4386422976501306e-06, "logits/chosen": -1.7547829151153564, "logits/rejected": -1.6789696216583252, "logps/chosen": -377.6453552246094, "logps/rejected": -584.998779296875, "loss": 0.4792, "rewards/accuracies": 0.8125, "rewards/chosen": -0.9642307162284851, "rewards/margins": 0.8439178466796875, "rewards/rejected": -1.8081486225128174, "step": 340 }, { "epoch": 0.09, "learning_rate": 4.569190600522193e-06, "logits/chosen": -1.6814041137695312, "logits/rejected": -1.4768749475479126, "logps/chosen": -506.594482421875, "logps/rejected": -591.21142578125, "loss": 0.5415, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.2887766361236572, "rewards/margins": 0.5483629107475281, "rewards/rejected": -1.8371394872665405, "step": 350 }, { "epoch": 0.09, "learning_rate": 4.699738903394257e-06, "logits/chosen": -1.6767174005508423, "logits/rejected": -1.5448577404022217, "logps/chosen": -424.47357177734375, "logps/rejected": -592.1474609375, "loss": 0.4852, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.9117814302444458, "rewards/margins": 0.8897954225540161, "rewards/rejected": -1.801576852798462, "step": 360 }, { "epoch": 0.1, "learning_rate": 4.8302872062663196e-06, "logits/chosen": -1.7710018157958984, "logits/rejected": -1.6090304851531982, "logps/chosen": -496.84796142578125, "logps/rejected": -633.0993041992188, "loss": 0.5396, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.243511438369751, "rewards/margins": 0.702240526676178, "rewards/rejected": -1.9457519054412842, "step": 370 }, { "epoch": 0.1, "learning_rate": 4.9608355091383814e-06, "logits/chosen": -1.4778488874435425, "logits/rejected": -1.3310272693634033, "logps/chosen": -402.7732849121094, "logps/rejected": -492.5380859375, "loss": 0.5017, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.9786807298660278, "rewards/margins": 0.623943567276001, "rewards/rejected": -1.6026241779327393, "step": 380 }, { "epoch": 0.1, "learning_rate": 4.9999488562447675e-06, "logits/chosen": -1.40001380443573, "logits/rejected": -1.2227522134780884, "logps/chosen": -532.0943603515625, "logps/rejected": -664.1284790039062, "loss": 0.6002, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.533832311630249, "rewards/margins": 0.5622987747192383, "rewards/rejected": -2.0961310863494873, "step": 390 }, { "epoch": 0.1, "learning_rate": 4.999698361256577e-06, "logits/chosen": -1.461018443107605, "logits/rejected": -1.1940746307373047, "logps/chosen": -509.679443359375, "logps/rejected": -636.8277587890625, "loss": 0.5079, "rewards/accuracies": 0.8125, "rewards/chosen": -1.1675481796264648, "rewards/margins": 0.7109408974647522, "rewards/rejected": -1.8784888982772827, "step": 400 }, { "epoch": 0.11, "learning_rate": 4.999239142174581e-06, "logits/chosen": -1.3779613971710205, "logits/rejected": -1.1279867887496948, "logps/chosen": -455.8907165527344, "logps/rejected": -604.815185546875, "loss": 0.4635, "rewards/accuracies": 0.8125, "rewards/chosen": -1.2613023519515991, "rewards/margins": 0.855317234992981, "rewards/rejected": -2.11661958694458, "step": 410 }, { "epoch": 0.11, "learning_rate": 4.99857123734344e-06, "logits/chosen": -1.3608977794647217, "logits/rejected": -0.901921272277832, "logps/chosen": -503.4825134277344, "logps/rejected": -629.4042358398438, "loss": 0.4903, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.280937671661377, "rewards/margins": 0.8498051762580872, "rewards/rejected": -2.1307430267333984, "step": 420 }, { "epoch": 0.11, "learning_rate": 4.997694702533016e-06, "logits/chosen": -1.29677414894104, "logits/rejected": -1.0692778825759888, "logps/chosen": -548.71826171875, "logps/rejected": -724.1248779296875, "loss": 0.466, "rewards/accuracies": 0.875, "rewards/chosen": -1.5205175876617432, "rewards/margins": 0.9235566854476929, "rewards/rejected": -2.4440743923187256, "step": 430 }, { "epoch": 0.12, "learning_rate": 4.996609610933713e-06, "logits/chosen": -1.3901933431625366, "logits/rejected": -1.1403733491897583, "logps/chosen": -505.74139404296875, "logps/rejected": -676.5526123046875, "loss": 0.5209, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.319435715675354, "rewards/margins": 0.8349205255508423, "rewards/rejected": -2.1543564796447754, "step": 440 }, { "epoch": 0.12, "learning_rate": 4.995316053150366e-06, "logits/chosen": -1.3645076751708984, "logits/rejected": -1.0972566604614258, "logps/chosen": -509.98016357421875, "logps/rejected": -682.1041259765625, "loss": 0.5307, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.315499186515808, "rewards/margins": 0.8478175401687622, "rewards/rejected": -2.163316488265991, "step": 450 }, { "epoch": 0.12, "learning_rate": 4.9938141371946815e-06, "logits/chosen": -1.3621938228607178, "logits/rejected": -0.9139550924301147, "logps/chosen": -506.2652893066406, "logps/rejected": -685.5975341796875, "loss": 0.4647, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.2151587009429932, "rewards/margins": 0.93292635679245, "rewards/rejected": -2.148085117340088, "step": 460 }, { "epoch": 0.12, "learning_rate": 4.992103988476206e-06, "logits/chosen": -1.175817847251892, "logits/rejected": -0.6028262376785278, "logps/chosen": -476.334716796875, "logps/rejected": -651.3397216796875, "loss": 0.4581, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.2715951204299927, "rewards/margins": 0.8561094999313354, "rewards/rejected": -2.127704381942749, "step": 470 }, { "epoch": 0.13, "learning_rate": 4.990185749791866e-06, "logits/chosen": -0.8731945753097534, "logits/rejected": -0.5909063220024109, "logps/chosen": -551.8997802734375, "logps/rejected": -691.8196411132812, "loss": 0.4837, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.4918056726455688, "rewards/margins": 1.0931782722473145, "rewards/rejected": -2.5849835872650146, "step": 480 }, { "epoch": 0.13, "learning_rate": 4.9880595813140395e-06, "logits/chosen": -0.8462217450141907, "logits/rejected": -0.4468405246734619, "logps/chosen": -490.5753479003906, "logps/rejected": -612.1764526367188, "loss": 0.4908, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.316733956336975, "rewards/margins": 0.7920147180557251, "rewards/rejected": -2.108748435974121, "step": 490 }, { "epoch": 0.13, "learning_rate": 4.985725660577184e-06, "logits/chosen": -0.6493757963180542, "logits/rejected": -0.6190133690834045, "logps/chosen": -496.7198181152344, "logps/rejected": -738.5999145507812, "loss": 0.4759, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.4519720077514648, "rewards/margins": 1.00557541847229, "rewards/rejected": -2.457547664642334, "step": 500 }, { "epoch": 0.13, "learning_rate": 4.983184182463009e-06, "logits/chosen": -0.6732873916625977, "logits/rejected": -0.4988276958465576, "logps/chosen": -510.90106201171875, "logps/rejected": -625.2664794921875, "loss": 0.4795, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.3072543144226074, "rewards/margins": 0.8223368525505066, "rewards/rejected": -2.1295909881591797, "step": 510 }, { "epoch": 0.14, "learning_rate": 4.980435359184203e-06, "logits/chosen": -1.0567286014556885, "logits/rejected": -0.561193585395813, "logps/chosen": -564.832275390625, "logps/rejected": -718.8677368164062, "loss": 0.5194, "rewards/accuracies": 0.8125, "rewards/chosen": -1.5574848651885986, "rewards/margins": 0.938727855682373, "rewards/rejected": -2.496212959289551, "step": 520 }, { "epoch": 0.14, "learning_rate": 4.9774794202667236e-06, "logits/chosen": -0.9730321168899536, "logits/rejected": -0.3585650324821472, "logps/chosen": -440.9288024902344, "logps/rejected": -633.592041015625, "loss": 0.3516, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -1.1980558633804321, "rewards/margins": 1.1983206272125244, "rewards/rejected": -2.396376132965088, "step": 530 }, { "epoch": 0.14, "learning_rate": 4.974316612530615e-06, "logits/chosen": -0.5258805751800537, "logits/rejected": -0.13965483009815216, "logps/chosen": -500.12091064453125, "logps/rejected": -707.7438354492188, "loss": 0.4002, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.656359314918518, "rewards/margins": 1.194873571395874, "rewards/rejected": -2.8512330055236816, "step": 540 }, { "epoch": 0.14, "learning_rate": 4.970947200069416e-06, "logits/chosen": -0.8342685699462891, "logits/rejected": 0.23774346709251404, "logps/chosen": -462.1539611816406, "logps/rejected": -631.550537109375, "loss": 0.5032, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.5009446144104004, "rewards/margins": 1.0611143112182617, "rewards/rejected": -2.562058925628662, "step": 550 }, { "epoch": 0.15, "learning_rate": 4.967371464228096e-06, "logits/chosen": -0.44802480936050415, "logits/rejected": -0.0694938451051712, "logps/chosen": -492.4813537597656, "logps/rejected": -663.474365234375, "loss": 0.4973, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.5952883958816528, "rewards/margins": 1.0112850666046143, "rewards/rejected": -2.6065733432769775, "step": 560 }, { "epoch": 0.15, "learning_rate": 4.963589703579569e-06, "logits/chosen": -0.5677907466888428, "logits/rejected": 0.08190581947565079, "logps/chosen": -503.6414489746094, "logps/rejected": -645.163330078125, "loss": 0.4839, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.4353834390640259, "rewards/margins": 0.922812819480896, "rewards/rejected": -2.358196496963501, "step": 570 }, { "epoch": 0.15, "learning_rate": 4.9596022338997615e-06, "logits/chosen": -0.5412989854812622, "logits/rejected": 0.47167086601257324, "logps/chosen": -497.56103515625, "logps/rejected": -681.288818359375, "loss": 0.4859, "rewards/accuracies": 0.75, "rewards/chosen": -1.6671890020370483, "rewards/margins": 1.1972028017044067, "rewards/rejected": -2.864391803741455, "step": 580 }, { "epoch": 0.15, "learning_rate": 4.955409388141243e-06, "logits/chosen": -0.8228802680969238, "logits/rejected": 0.08221787214279175, "logps/chosen": -544.5670776367188, "logps/rejected": -733.0662231445312, "loss": 0.5465, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.6897475719451904, "rewards/margins": 1.3086159229278564, "rewards/rejected": -2.998363971710205, "step": 590 }, { "epoch": 0.16, "learning_rate": 4.951011516405429e-06, "logits/chosen": -0.3682901859283447, "logits/rejected": 0.21380114555358887, "logps/chosen": -472.1390686035156, "logps/rejected": -650.9225463867188, "loss": 0.4794, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.3374059200286865, "rewards/margins": 0.9791024923324585, "rewards/rejected": -2.3165085315704346, "step": 600 }, { "epoch": 0.16, "eval_logits/chosen": 0.4449966549873352, "eval_logits/rejected": 1.2460291385650635, "eval_logps/chosen": -494.4773254394531, "eval_logps/rejected": -666.9945068359375, "eval_loss": 0.4700670540332794, "eval_rewards/accuracies": 0.7699999809265137, "eval_rewards/chosen": -1.38816499710083, "eval_rewards/margins": 1.0917549133300781, "eval_rewards/rejected": -2.479919910430908, "eval_runtime": 1372.1838, "eval_samples_per_second": 1.458, "eval_steps_per_second": 0.364, "step": 600 }, { "epoch": 0.16, "learning_rate": 4.946408985913344e-06, "logits/chosen": -1.1787288188934326, "logits/rejected": 0.032404374331235886, "logps/chosen": -540.3707275390625, "logps/rejected": -690.0636596679688, "loss": 0.4104, "rewards/accuracies": 0.875, "rewards/chosen": -1.2028136253356934, "rewards/margins": 1.3079878091812134, "rewards/rejected": -2.510801315307617, "step": 610 }, { "epoch": 0.16, "learning_rate": 4.941602180974958e-06, "logits/chosen": -0.5976434946060181, "logits/rejected": 0.23231466114521027, "logps/chosen": -513.5987548828125, "logps/rejected": -683.162353515625, "loss": 0.4517, "rewards/accuracies": 0.75, "rewards/chosen": -1.503683090209961, "rewards/margins": 1.2033047676086426, "rewards/rejected": -2.7069880962371826, "step": 620 }, { "epoch": 0.16, "learning_rate": 4.936591502957101e-06, "logits/chosen": -0.6624077558517456, "logits/rejected": 0.3608100712299347, "logps/chosen": -447.55230712890625, "logps/rejected": -660.681640625, "loss": 0.3989, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -1.3714964389801025, "rewards/margins": 1.2194106578826904, "rewards/rejected": -2.590907335281372, "step": 630 }, { "epoch": 0.17, "learning_rate": 4.931377370249946e-06, "logits/chosen": -0.27748388051986694, "logits/rejected": 0.2729637026786804, "logps/chosen": -528.355224609375, "logps/rejected": -733.2789916992188, "loss": 0.4943, "rewards/accuracies": 0.75, "rewards/chosen": -1.8842767477035522, "rewards/margins": 1.311034917831421, "rewards/rejected": -3.1953113079071045, "step": 640 }, { "epoch": 0.17, "learning_rate": 4.925960218232073e-06, "logits/chosen": -0.4966405928134918, "logits/rejected": 0.048351895064115524, "logps/chosen": -491.5657653808594, "logps/rejected": -743.1395874023438, "loss": 0.4957, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.6836191415786743, "rewards/margins": 1.473381519317627, "rewards/rejected": -3.157000780105591, "step": 650 }, { "epoch": 0.17, "learning_rate": 4.920340499234116e-06, "logits/chosen": -0.24024248123168945, "logits/rejected": -0.06672336161136627, "logps/chosen": -472.2447814941406, "logps/rejected": -663.1290893554688, "loss": 0.4382, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.4003952741622925, "rewards/margins": 1.0303130149841309, "rewards/rejected": -2.430708408355713, "step": 660 }, { "epoch": 0.18, "learning_rate": 4.914518682500995e-06, "logits/chosen": -0.8933698534965515, "logits/rejected": -0.014977499842643738, "logps/chosen": -509.6695251464844, "logps/rejected": -674.0376586914062, "loss": 0.4931, "rewards/accuracies": 0.75, "rewards/chosen": -1.4237163066864014, "rewards/margins": 1.024255394935608, "rewards/rejected": -2.4479715824127197, "step": 670 }, { "epoch": 0.18, "learning_rate": 4.9084952541527315e-06, "logits/chosen": -0.5897430181503296, "logits/rejected": 0.24839851260185242, "logps/chosen": -533.2153930664062, "logps/rejected": -633.3540649414062, "loss": 0.4995, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.5195014476776123, "rewards/margins": 0.7836617827415466, "rewards/rejected": -2.3031630516052246, "step": 680 }, { "epoch": 0.18, "learning_rate": 4.902270717143858e-06, "logits/chosen": -0.20460684597492218, "logits/rejected": 0.12876734137535095, "logps/chosen": -454.1747131347656, "logps/rejected": -706.5838623046875, "loss": 0.3716, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.4049168825149536, "rewards/margins": 1.3669251203536987, "rewards/rejected": -2.7718420028686523, "step": 690 }, { "epoch": 0.18, "learning_rate": 4.895845591221427e-06, "logits/chosen": -0.5709416270256042, "logits/rejected": 0.21547503769397736, "logps/chosen": -543.8369750976562, "logps/rejected": -720.9224853515625, "loss": 0.4566, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.6587600708007812, "rewards/margins": 1.1786607503890991, "rewards/rejected": -2.83742094039917, "step": 700 }, { "epoch": 0.19, "learning_rate": 4.8892204128816e-06, "logits/chosen": -0.27632415294647217, "logits/rejected": -0.12886568903923035, "logps/chosen": -468.2186584472656, "logps/rejected": -706.5438232421875, "loss": 0.4874, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.4443271160125732, "rewards/margins": 1.2774336338043213, "rewards/rejected": -2.7217605113983154, "step": 710 }, { "epoch": 0.19, "learning_rate": 4.882395735324864e-06, "logits/chosen": -0.5171593427658081, "logits/rejected": 0.06492243707180023, "logps/chosen": -399.1227111816406, "logps/rejected": -646.8035888671875, "loss": 0.4334, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -1.1794382333755493, "rewards/margins": 1.383467435836792, "rewards/rejected": -2.5629055500030518, "step": 720 }, { "epoch": 0.19, "learning_rate": 4.87537212840983e-06, "logits/chosen": -0.38013777136802673, "logits/rejected": 0.17461785674095154, "logps/chosen": -431.56939697265625, "logps/rejected": -727.7451171875, "loss": 0.403, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.2866822481155396, "rewards/margins": 1.4361357688903809, "rewards/rejected": -2.722817897796631, "step": 730 }, { "epoch": 0.19, "learning_rate": 4.8681501786056545e-06, "logits/chosen": -0.4910075068473816, "logits/rejected": 0.1193656325340271, "logps/chosen": -524.5296630859375, "logps/rejected": -716.439697265625, "loss": 0.5003, "rewards/accuracies": 0.75, "rewards/chosen": -1.4974342584609985, "rewards/margins": 1.3101706504821777, "rewards/rejected": -2.8076047897338867, "step": 740 }, { "epoch": 0.2, "learning_rate": 4.860730488943068e-06, "logits/chosen": -0.4912436902523041, "logits/rejected": 0.18202224373817444, "logps/chosen": -527.1237182617188, "logps/rejected": -659.1048583984375, "loss": 0.4865, "rewards/accuracies": 0.75, "rewards/chosen": -1.5281684398651123, "rewards/margins": 0.9601262211799622, "rewards/rejected": -2.4882943630218506, "step": 750 }, { "epoch": 0.2, "learning_rate": 4.853113678964022e-06, "logits/chosen": -0.8603304624557495, "logits/rejected": -0.08993472903966904, "logps/chosen": -432.42279052734375, "logps/rejected": -642.409912109375, "loss": 0.3753, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -1.2163296937942505, "rewards/margins": 1.3121674060821533, "rewards/rejected": -2.5284969806671143, "step": 760 }, { "epoch": 0.2, "learning_rate": 4.845300384669958e-06, "logits/chosen": 0.24725647270679474, "logits/rejected": 0.0013871907722204924, "logps/chosen": -527.3872680664062, "logps/rejected": -742.9652099609375, "loss": 0.4498, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.8072938919067383, "rewards/margins": 1.2479110956192017, "rewards/rejected": -3.0552048683166504, "step": 770 }, { "epoch": 0.2, "learning_rate": 4.837291258468701e-06, "logits/chosen": -0.6004719734191895, "logits/rejected": 0.5962368249893188, "logps/chosen": -507.5689392089844, "logps/rejected": -673.4093627929688, "loss": 0.4537, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.6435458660125732, "rewards/margins": 1.2729610204696655, "rewards/rejected": -2.9165070056915283, "step": 780 }, { "epoch": 0.21, "learning_rate": 4.829086969119984e-06, "logits/chosen": -0.4771009385585785, "logits/rejected": 0.03888826444745064, "logps/chosen": -551.6219482421875, "logps/rejected": -797.9405517578125, "loss": 0.439, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.8441495895385742, "rewards/margins": 1.5053937435150146, "rewards/rejected": -3.349543333053589, "step": 790 }, { "epoch": 0.21, "learning_rate": 4.820688201679605e-06, "logits/chosen": -0.36556169390678406, "logits/rejected": 0.3322374224662781, "logps/chosen": -510.71661376953125, "logps/rejected": -704.6385498046875, "loss": 0.4447, "rewards/accuracies": 0.875, "rewards/chosen": -1.6285368204116821, "rewards/margins": 1.4855858087539673, "rewards/rejected": -3.1141226291656494, "step": 800 }, { "epoch": 0.21, "learning_rate": 4.8120956574422315e-06, "logits/chosen": -0.3884666860103607, "logits/rejected": -0.13175992667675018, "logps/chosen": -518.641357421875, "logps/rejected": -642.6422119140625, "loss": 0.5053, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.5760186910629272, "rewards/margins": 0.9459335207939148, "rewards/rejected": -2.5219521522521973, "step": 810 }, { "epoch": 0.21, "learning_rate": 4.803310053882831e-06, "logits/chosen": -1.0650156736373901, "logits/rejected": 0.38095536828041077, "logps/chosen": -542.5806274414062, "logps/rejected": -690.2403564453125, "loss": 0.4374, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.6415122747421265, "rewards/margins": 1.045480728149414, "rewards/rejected": -2.686992883682251, "step": 820 }, { "epoch": 0.22, "learning_rate": 4.794332124596775e-06, "logits/chosen": -0.6507046222686768, "logits/rejected": 0.048465847969055176, "logps/chosen": -479.3224182128906, "logps/rejected": -674.3448486328125, "loss": 0.401, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.3757288455963135, "rewards/margins": 1.2602720260620117, "rewards/rejected": -2.636000633239746, "step": 830 }, { "epoch": 0.22, "learning_rate": 4.785162619238575e-06, "logits/chosen": -0.6413692235946655, "logits/rejected": -0.042513225227594376, "logps/chosen": -557.0155029296875, "logps/rejected": -792.69970703125, "loss": 0.4201, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.6387875080108643, "rewards/margins": 1.4760812520980835, "rewards/rejected": -3.1148688793182373, "step": 840 }, { "epoch": 0.22, "learning_rate": 4.775802303459288e-06, "logits/chosen": -0.74284428358078, "logits/rejected": -0.1755208522081375, "logps/chosen": -588.0230712890625, "logps/rejected": -802.0186767578125, "loss": 0.4069, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -1.7381775379180908, "rewards/margins": 1.4629442691802979, "rewards/rejected": -3.2011218070983887, "step": 850 }, { "epoch": 0.23, "learning_rate": 4.766251958842589e-06, "logits/chosen": -0.42949801683425903, "logits/rejected": -0.4735488295555115, "logps/chosen": -518.86572265625, "logps/rejected": -755.33154296875, "loss": 0.4045, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -1.6570861339569092, "rewards/margins": 1.4493482112884521, "rewards/rejected": -3.1064348220825195, "step": 860 }, { "epoch": 0.23, "learning_rate": 4.7565123828395066e-06, "logits/chosen": -0.7870966196060181, "logits/rejected": 0.0325060598552227, "logps/chosen": -515.7349243164062, "logps/rejected": -726.00927734375, "loss": 0.5102, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -1.6367769241333008, "rewards/margins": 1.335331678390503, "rewards/rejected": -2.9721086025238037, "step": 870 }, { "epoch": 0.23, "learning_rate": 4.746584388701831e-06, "logits/chosen": -0.7208787202835083, "logits/rejected": -0.034571003168821335, "logps/chosen": -488.56341552734375, "logps/rejected": -712.2532958984375, "loss": 0.5118, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.7884547710418701, "rewards/margins": 1.3315757513046265, "rewards/rejected": -3.120030164718628, "step": 880 }, { "epoch": 0.23, "learning_rate": 4.736468805414218e-06, "logits/chosen": -0.7053539752960205, "logits/rejected": -0.015096127986907959, "logps/chosen": -474.1915588378906, "logps/rejected": -634.8294677734375, "loss": 0.4931, "rewards/accuracies": 0.8125, "rewards/chosen": -1.4765193462371826, "rewards/margins": 1.0172039270401, "rewards/rejected": -2.4937233924865723, "step": 890 }, { "epoch": 0.24, "learning_rate": 4.7261664776249595e-06, "logits/chosen": -1.0484793186187744, "logits/rejected": -0.35419899225234985, "logps/chosen": -551.4525756835938, "logps/rejected": -721.5409545898438, "loss": 0.4519, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.49507737159729, "rewards/margins": 1.2039979696273804, "rewards/rejected": -2.699075222015381, "step": 900 }, { "epoch": 0.24, "eval_logits/chosen": 0.19788537919521332, "eval_logits/rejected": 1.0802806615829468, "eval_logps/chosen": -498.0536804199219, "eval_logps/rejected": -686.2431030273438, "eval_loss": 0.456636905670166, "eval_rewards/accuracies": 0.7730000019073486, "eval_rewards/chosen": -1.42392897605896, "eval_rewards/margins": 1.248477816581726, "eval_rewards/rejected": -2.6724064350128174, "eval_runtime": 1384.9339, "eval_samples_per_second": 1.444, "eval_steps_per_second": 0.361, "step": 900 }, { "epoch": 0.24, "learning_rate": 4.715678265575463e-06, "logits/chosen": -0.890539824962616, "logits/rejected": 0.22412686049938202, "logps/chosen": -490.0135803222656, "logps/rejected": -712.4631958007812, "loss": 0.3771, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -1.2612826824188232, "rewards/margins": 1.602736234664917, "rewards/rejected": -2.8640189170837402, "step": 910 }, { "epoch": 0.24, "learning_rate": 4.705005045028415e-06, "logits/chosen": -0.8860180974006653, "logits/rejected": 0.03514351695775986, "logps/chosen": -594.3389282226562, "logps/rejected": -782.8358154296875, "loss": 0.4195, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.7765058279037476, "rewards/margins": 1.2403103113174438, "rewards/rejected": -3.0168161392211914, "step": 920 }, { "epoch": 0.24, "learning_rate": 4.694147707194659e-06, "logits/chosen": -0.5554194450378418, "logits/rejected": 0.016318077221512794, "logps/chosen": -585.2276000976562, "logps/rejected": -805.9215087890625, "loss": 0.3769, "rewards/accuracies": 0.75, "rewards/chosen": -1.8951377868652344, "rewards/margins": 1.719668984413147, "rewards/rejected": -3.61480712890625, "step": 930 }, { "epoch": 0.25, "learning_rate": 4.683107158658782e-06, "logits/chosen": -0.7583510875701904, "logits/rejected": 0.25492575764656067, "logps/chosen": -562.8099975585938, "logps/rejected": -745.3627319335938, "loss": 0.5012, "rewards/accuracies": 0.8125, "rewards/chosen": -1.83584725856781, "rewards/margins": 1.2255539894104004, "rewards/rejected": -3.061401128768921, "step": 940 }, { "epoch": 0.25, "learning_rate": 4.671884321303407e-06, "logits/chosen": -1.1498582363128662, "logits/rejected": 0.7587814331054688, "logps/chosen": -591.7152099609375, "logps/rejected": -751.0265502929688, "loss": 0.4275, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.7667793035507202, "rewards/margins": 1.4644314050674438, "rewards/rejected": -3.231210231781006, "step": 950 }, { "epoch": 0.25, "learning_rate": 4.660480132232224e-06, "logits/chosen": -0.7957364320755005, "logits/rejected": -0.17932990193367004, "logps/chosen": -428.00958251953125, "logps/rejected": -722.1585083007812, "loss": 0.3945, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.4679275751113892, "rewards/margins": 1.536849021911621, "rewards/rejected": -3.0047767162323, "step": 960 }, { "epoch": 0.25, "learning_rate": 4.6488955436917414e-06, "logits/chosen": -0.7032185196876526, "logits/rejected": 0.244097039103508, "logps/chosen": -511.335693359375, "logps/rejected": -830.0217895507812, "loss": 0.4831, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.6884835958480835, "rewards/margins": 1.69620680809021, "rewards/rejected": -3.384690761566162, "step": 970 }, { "epoch": 0.26, "learning_rate": 4.6371315229917644e-06, "logits/chosen": -0.9565087556838989, "logits/rejected": -0.6389614343643188, "logps/chosen": -467.09814453125, "logps/rejected": -740.5447998046875, "loss": 0.4217, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -1.331072449684143, "rewards/margins": 1.5709624290466309, "rewards/rejected": -2.9020345211029053, "step": 980 }, { "epoch": 0.26, "learning_rate": 4.625189052424638e-06, "logits/chosen": -0.9426406621932983, "logits/rejected": -0.6099969148635864, "logps/chosen": -464.8932189941406, "logps/rejected": -654.5234985351562, "loss": 0.457, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -1.2798701524734497, "rewards/margins": 1.0147043466567993, "rewards/rejected": -2.29457426071167, "step": 990 }, { "epoch": 0.26, "learning_rate": 4.613069129183218e-06, "logits/chosen": -0.9859519004821777, "logits/rejected": -0.3881329894065857, "logps/chosen": -416.37750244140625, "logps/rejected": -661.3399658203125, "loss": 0.4259, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -1.1251696348190308, "rewards/margins": 1.4806818962097168, "rewards/rejected": -2.605851650238037, "step": 1000 }, { "epoch": 0.26, "learning_rate": 4.600772765277607e-06, "logits/chosen": -0.8805161714553833, "logits/rejected": -0.5182097554206848, "logps/chosen": -533.8609008789062, "logps/rejected": -738.94921875, "loss": 0.4221, "rewards/accuracies": 0.8125, "rewards/chosen": -1.4609010219573975, "rewards/margins": 1.319215178489685, "rewards/rejected": -2.780116319656372, "step": 1010 }, { "epoch": 0.27, "learning_rate": 4.588300987450652e-06, "logits/chosen": -0.7461687922477722, "logits/rejected": -0.11924894899129868, "logps/chosen": -504.3556213378906, "logps/rejected": -709.5269165039062, "loss": 0.4198, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.595538854598999, "rewards/margins": 1.395408272743225, "rewards/rejected": -2.9909470081329346, "step": 1020 }, { "epoch": 0.27, "learning_rate": 4.5756548370922136e-06, "logits/chosen": -0.9196340441703796, "logits/rejected": 0.15583333373069763, "logps/chosen": -467.1847229003906, "logps/rejected": -698.4019165039062, "loss": 0.471, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -1.4349987506866455, "rewards/margins": 1.6199867725372314, "rewards/rejected": -3.054985523223877, "step": 1030 }, { "epoch": 0.27, "learning_rate": 4.562835370152206e-06, "logits/chosen": -0.9880784153938293, "logits/rejected": -0.4833584427833557, "logps/chosen": -474.13946533203125, "logps/rejected": -677.9033813476562, "loss": 0.4454, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -1.532492756843567, "rewards/margins": 1.2476747035980225, "rewards/rejected": -2.7801673412323, "step": 1040 }, { "epoch": 0.27, "learning_rate": 4.54984365705243e-06, "logits/chosen": -0.9044283628463745, "logits/rejected": 0.2612631916999817, "logps/chosen": -487.19573974609375, "logps/rejected": -666.8839721679688, "loss": 0.4112, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.5281569957733154, "rewards/margins": 1.2005985975265503, "rewards/rejected": -2.728755474090576, "step": 1050 }, { "epoch": 0.28, "learning_rate": 4.536680782597191e-06, "logits/chosen": -1.1122404336929321, "logits/rejected": 0.34001001715660095, "logps/chosen": -424.924560546875, "logps/rejected": -637.4059448242188, "loss": 0.4054, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.2176024913787842, "rewards/margins": 1.382258653640747, "rewards/rejected": -2.5998611450195312, "step": 1060 }, { "epoch": 0.28, "learning_rate": 4.523347845882718e-06, "logits/chosen": -1.2397373914718628, "logits/rejected": -0.0026629925705492496, "logps/chosen": -473.2481994628906, "logps/rejected": -670.2132568359375, "loss": 0.472, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -1.3779747486114502, "rewards/margins": 1.2943679094314575, "rewards/rejected": -2.672342300415039, "step": 1070 }, { "epoch": 0.28, "learning_rate": 4.50984596020539e-06, "logits/chosen": -1.0471652746200562, "logits/rejected": -0.250629723072052, "logps/chosen": -507.221435546875, "logps/rejected": -730.1007080078125, "loss": 0.4251, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.5079420804977417, "rewards/margins": 1.3903275728225708, "rewards/rejected": -2.8982696533203125, "step": 1080 }, { "epoch": 0.29, "learning_rate": 4.4961762529687745e-06, "logits/chosen": -0.47195902466773987, "logits/rejected": -0.18957489728927612, "logps/chosen": -446.6513671875, "logps/rejected": -749.2055053710938, "loss": 0.3931, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": -1.421588659286499, "rewards/margins": 1.843101143836975, "rewards/rejected": -3.2646899223327637, "step": 1090 }, { "epoch": 0.29, "learning_rate": 4.482339865589492e-06, "logits/chosen": -0.6734046936035156, "logits/rejected": 0.2455734759569168, "logps/chosen": -536.9996337890625, "logps/rejected": -779.7747802734375, "loss": 0.3976, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -1.8160717487335205, "rewards/margins": 1.6105674505233765, "rewards/rejected": -3.4266390800476074, "step": 1100 }, { "epoch": 0.29, "learning_rate": 4.468337953401909e-06, "logits/chosen": -0.7846983075141907, "logits/rejected": 0.3714667558670044, "logps/chosen": -643.3924560546875, "logps/rejected": -889.9669189453125, "loss": 0.4624, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.112175703048706, "rewards/margins": 1.6929798126220703, "rewards/rejected": -3.8051555156707764, "step": 1110 }, { "epoch": 0.29, "learning_rate": 4.45417168556166e-06, "logits/chosen": -0.887243390083313, "logits/rejected": 0.19038431346416473, "logps/chosen": -501.67987060546875, "logps/rejected": -794.1953735351562, "loss": 0.4672, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.6879377365112305, "rewards/margins": 1.825568437576294, "rewards/rejected": -3.5135064125061035, "step": 1120 }, { "epoch": 0.3, "learning_rate": 4.439842244948036e-06, "logits/chosen": -1.2177503108978271, "logits/rejected": -0.813552975654602, "logps/chosen": -524.879150390625, "logps/rejected": -667.7706298828125, "loss": 0.4243, "rewards/accuracies": 0.8125, "rewards/chosen": -1.5180702209472656, "rewards/margins": 1.0134727954864502, "rewards/rejected": -2.531543016433716, "step": 1130 }, { "epoch": 0.3, "learning_rate": 4.425350828065204e-06, "logits/chosen": -1.0455009937286377, "logits/rejected": 0.2684328556060791, "logps/chosen": -534.6132202148438, "logps/rejected": -721.2943115234375, "loss": 0.3871, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.7661949396133423, "rewards/margins": 1.332626223564148, "rewards/rejected": -3.0988211631774902, "step": 1140 }, { "epoch": 0.3, "learning_rate": 4.410698644942303e-06, "logits/chosen": -0.8786664009094238, "logits/rejected": 0.14112402498722076, "logps/chosen": -458.5977478027344, "logps/rejected": -710.25927734375, "loss": 0.3959, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.411984920501709, "rewards/margins": 1.6877896785736084, "rewards/rejected": -3.0997745990753174, "step": 1150 }, { "epoch": 0.3, "learning_rate": 4.395886919032406e-06, "logits/chosen": -0.8496414422988892, "logits/rejected": -0.006237986497581005, "logps/chosen": -508.9517517089844, "logps/rejected": -735.8714599609375, "loss": 0.4109, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -1.5958458185195923, "rewards/margins": 1.4991271495819092, "rewards/rejected": -3.094973087310791, "step": 1160 }, { "epoch": 0.31, "learning_rate": 4.380916887110366e-06, "logits/chosen": -0.7842418551445007, "logits/rejected": -0.08200596272945404, "logps/chosen": -496.2059020996094, "logps/rejected": -729.1738891601562, "loss": 0.3737, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -1.5399061441421509, "rewards/margins": 1.5425506830215454, "rewards/rejected": -3.0824568271636963, "step": 1170 }, { "epoch": 0.31, "learning_rate": 4.365789799169539e-06, "logits/chosen": -1.2651920318603516, "logits/rejected": 0.12391755729913712, "logps/chosen": -512.1771850585938, "logps/rejected": -717.8344116210938, "loss": 0.4228, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.5166397094726562, "rewards/margins": 1.4622437953948975, "rewards/rejected": -2.9788835048675537, "step": 1180 }, { "epoch": 0.31, "learning_rate": 4.350506918317416e-06, "logits/chosen": -0.9895895719528198, "logits/rejected": -0.03255582973361015, "logps/chosen": -519.2945556640625, "logps/rejected": -687.9844970703125, "loss": 0.4106, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -1.6044280529022217, "rewards/margins": 1.4460744857788086, "rewards/rejected": -3.050502300262451, "step": 1190 }, { "epoch": 0.31, "learning_rate": 4.335069520670149e-06, "logits/chosen": -0.8492997884750366, "logits/rejected": -0.5858234167098999, "logps/chosen": -516.3770141601562, "logps/rejected": -707.2117309570312, "loss": 0.4034, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.7651269435882568, "rewards/margins": 1.3661835193634033, "rewards/rejected": -3.1313109397888184, "step": 1200 }, { "epoch": 0.31, "eval_logits/chosen": 0.7243556976318359, "eval_logits/rejected": 1.7155570983886719, "eval_logps/chosen": -545.945068359375, "eval_logps/rejected": -770.7060546875, "eval_loss": 0.44869744777679443, "eval_rewards/accuracies": 0.7870000004768372, "eval_rewards/chosen": -1.9028427600860596, "eval_rewards/margins": 1.614193320274353, "eval_rewards/rejected": -3.517036199569702, "eval_runtime": 1383.7156, "eval_samples_per_second": 1.445, "eval_steps_per_second": 0.361, "step": 1200 }, { "epoch": 0.32, "learning_rate": 4.319478895246e-06, "logits/chosen": -0.6312126517295837, "logits/rejected": 0.010389542207121849, "logps/chosen": -485.5673828125, "logps/rejected": -724.3965454101562, "loss": 0.4596, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.7340030670166016, "rewards/margins": 1.5444698333740234, "rewards/rejected": -3.278472900390625, "step": 1210 }, { "epoch": 0.32, "learning_rate": 4.303736343857704e-06, "logits/chosen": -0.5306238532066345, "logits/rejected": 0.09534727036952972, "logps/chosen": -501.3701171875, "logps/rejected": -773.635498046875, "loss": 0.4065, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.6401008367538452, "rewards/margins": 1.739175796508789, "rewards/rejected": -3.379276752471924, "step": 1220 }, { "epoch": 0.32, "learning_rate": 4.287843181003772e-06, "logits/chosen": -0.7599018812179565, "logits/rejected": 0.544152557849884, "logps/chosen": -492.7386779785156, "logps/rejected": -739.1934814453125, "loss": 0.3467, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.591827392578125, "rewards/margins": 1.6447445154190063, "rewards/rejected": -3.236571788787842, "step": 1230 }, { "epoch": 0.32, "learning_rate": 4.27180073375873e-06, "logits/chosen": -0.7321812510490417, "logits/rejected": -0.04632633179426193, "logps/chosen": -503.90655517578125, "logps/rejected": -751.9849243164062, "loss": 0.4352, "rewards/accuracies": 0.75, "rewards/chosen": -1.7118949890136719, "rewards/margins": 1.5225197076797485, "rewards/rejected": -3.234414577484131, "step": 1240 }, { "epoch": 0.33, "learning_rate": 4.255610341662304e-06, "logits/chosen": -0.8730325698852539, "logits/rejected": -0.1791534423828125, "logps/chosen": -524.244140625, "logps/rejected": -752.1405029296875, "loss": 0.3827, "rewards/accuracies": 0.8125, "rewards/chosen": -1.7566620111465454, "rewards/margins": 1.7185337543487549, "rewards/rejected": -3.4751956462860107, "step": 1250 }, { "epoch": 0.33, "learning_rate": 4.2392733566075764e-06, "logits/chosen": -0.8349526524543762, "logits/rejected": 0.03989090770483017, "logps/chosen": -555.2600708007812, "logps/rejected": -799.1808471679688, "loss": 0.4153, "rewards/accuracies": 0.8125, "rewards/chosen": -1.8972892761230469, "rewards/margins": 1.5951424837112427, "rewards/rejected": -3.492431640625, "step": 1260 }, { "epoch": 0.33, "learning_rate": 4.2227911427280975e-06, "logits/chosen": -1.2052414417266846, "logits/rejected": 0.40989890694618225, "logps/chosen": -535.0908203125, "logps/rejected": -766.23974609375, "loss": 0.4851, "rewards/accuracies": 0.875, "rewards/chosen": -1.7357299327850342, "rewards/margins": 1.7371448278427124, "rewards/rejected": -3.472874879837036, "step": 1270 }, { "epoch": 0.33, "learning_rate": 4.206165076283983e-06, "logits/chosen": -1.3161356449127197, "logits/rejected": -0.31805121898651123, "logps/chosen": -562.9700927734375, "logps/rejected": -699.4861450195312, "loss": 0.4345, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.6805912256240845, "rewards/margins": 1.0278584957122803, "rewards/rejected": -2.708449602127075, "step": 1280 }, { "epoch": 0.34, "learning_rate": 4.189396545546995e-06, "logits/chosen": -1.2732408046722412, "logits/rejected": -0.36048611998558044, "logps/chosen": -463.95849609375, "logps/rejected": -677.6444091796875, "loss": 0.4661, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.385292649269104, "rewards/margins": 1.3424113988876343, "rewards/rejected": -2.7277040481567383, "step": 1290 }, { "epoch": 0.34, "learning_rate": 4.172486950684627e-06, "logits/chosen": -1.3132286071777344, "logits/rejected": 0.031231578439474106, "logps/chosen": -535.4584350585938, "logps/rejected": -707.3728637695312, "loss": 0.439, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.6812185049057007, "rewards/margins": 1.3230525255203247, "rewards/rejected": -3.0042712688446045, "step": 1300 }, { "epoch": 0.34, "learning_rate": 4.155437703643182e-06, "logits/chosen": -1.2260167598724365, "logits/rejected": -0.5245649814605713, "logps/chosen": -462.94403076171875, "logps/rejected": -694.3932495117188, "loss": 0.4018, "rewards/accuracies": 0.8125, "rewards/chosen": -1.4110779762268066, "rewards/margins": 1.4465053081512451, "rewards/rejected": -2.8575832843780518, "step": 1310 }, { "epoch": 0.35, "learning_rate": 4.138250228029882e-06, "logits/chosen": -1.0029791593551636, "logits/rejected": -0.45876234769821167, "logps/chosen": -471.4588928222656, "logps/rejected": -711.7996215820312, "loss": 0.3972, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.6702163219451904, "rewards/margins": 1.6204955577850342, "rewards/rejected": -3.2907118797302246, "step": 1320 }, { "epoch": 0.35, "learning_rate": 4.120925958993994e-06, "logits/chosen": -0.5831348299980164, "logits/rejected": -0.2067866027355194, "logps/chosen": -476.1014099121094, "logps/rejected": -722.8709106445312, "loss": 0.4493, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.6658756732940674, "rewards/margins": 1.6795238256454468, "rewards/rejected": -3.3453993797302246, "step": 1330 }, { "epoch": 0.35, "learning_rate": 4.103466343106999e-06, "logits/chosen": -1.077728033065796, "logits/rejected": 0.1974649727344513, "logps/chosen": -636.1361083984375, "logps/rejected": -829.7213134765625, "loss": 0.5001, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.2163033485412598, "rewards/margins": 1.5475972890853882, "rewards/rejected": -3.7639007568359375, "step": 1340 }, { "epoch": 0.35, "learning_rate": 4.085872838241797e-06, "logits/chosen": -0.8451377749443054, "logits/rejected": -0.4800887703895569, "logps/chosen": -523.9595947265625, "logps/rejected": -813.6463012695312, "loss": 0.4193, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.937745451927185, "rewards/margins": 1.8354793787002563, "rewards/rejected": -3.7732245922088623, "step": 1350 }, { "epoch": 0.36, "learning_rate": 4.06814691345098e-06, "logits/chosen": -0.8073797225952148, "logits/rejected": -0.67634516954422, "logps/chosen": -499.55364990234375, "logps/rejected": -725.6375732421875, "loss": 0.5085, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.8341518640518188, "rewards/margins": 1.3492587804794312, "rewards/rejected": -3.18341064453125, "step": 1360 }, { "epoch": 0.36, "learning_rate": 4.050290048844171e-06, "logits/chosen": -1.3967281579971313, "logits/rejected": -0.1124715581536293, "logps/chosen": -586.6700439453125, "logps/rejected": -785.9407958984375, "loss": 0.4425, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.6672008037567139, "rewards/margins": 1.4039936065673828, "rewards/rejected": -3.0711944103240967, "step": 1370 }, { "epoch": 0.36, "learning_rate": 4.032303735464422e-06, "logits/chosen": -1.3250014781951904, "logits/rejected": -0.28036853671073914, "logps/chosen": -523.9191284179688, "logps/rejected": -757.8228759765625, "loss": 0.3506, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -1.4709548950195312, "rewards/margins": 1.7138077020645142, "rewards/rejected": -3.184762477874756, "step": 1380 }, { "epoch": 0.36, "learning_rate": 4.014189475163727e-06, "logits/chosen": -0.9671980142593384, "logits/rejected": -0.292216956615448, "logps/chosen": -559.5821533203125, "logps/rejected": -784.157958984375, "loss": 0.3887, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -1.7415062189102173, "rewards/margins": 1.629124641418457, "rewards/rejected": -3.370630979537964, "step": 1390 }, { "epoch": 0.37, "learning_rate": 3.995948780477605e-06, "logits/chosen": -1.1137468814849854, "logits/rejected": -0.3016485273838043, "logps/chosen": -586.1868286132812, "logps/rejected": -790.1680908203125, "loss": 0.4715, "rewards/accuracies": 0.75, "rewards/chosen": -1.8678308725357056, "rewards/margins": 1.3062608242034912, "rewards/rejected": -3.1740918159484863, "step": 1400 }, { "epoch": 0.37, "learning_rate": 3.977583174498816e-06, "logits/chosen": -0.9699400067329407, "logits/rejected": -0.7510320544242859, "logps/chosen": -545.1305541992188, "logps/rejected": -793.4750366210938, "loss": 0.4399, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.7863260507583618, "rewards/margins": 1.481147050857544, "rewards/rejected": -3.267472743988037, "step": 1410 }, { "epoch": 0.37, "learning_rate": 3.959094190750172e-06, "logits/chosen": -1.3356729745864868, "logits/rejected": -0.6711053848266602, "logps/chosen": -574.8829956054688, "logps/rejected": -773.9825439453125, "loss": 0.3984, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.7231754064559937, "rewards/margins": 1.5199847221374512, "rewards/rejected": -3.2431602478027344, "step": 1420 }, { "epoch": 0.37, "learning_rate": 3.9404833730564975e-06, "logits/chosen": -1.2241183519363403, "logits/rejected": -0.7589890956878662, "logps/chosen": -524.2040405273438, "logps/rejected": -684.8192138671875, "loss": 0.4751, "rewards/accuracies": 0.8125, "rewards/chosen": -1.550222396850586, "rewards/margins": 1.2541849613189697, "rewards/rejected": -2.8044073581695557, "step": 1430 }, { "epoch": 0.38, "learning_rate": 3.921752275415712e-06, "logits/chosen": -1.2616220712661743, "logits/rejected": -0.6698473691940308, "logps/chosen": -494.5272521972656, "logps/rejected": -719.5067749023438, "loss": 0.407, "rewards/accuracies": 0.8125, "rewards/chosen": -1.5580322742462158, "rewards/margins": 1.4134986400604248, "rewards/rejected": -2.9715309143066406, "step": 1440 }, { "epoch": 0.38, "learning_rate": 3.902902461869079e-06, "logits/chosen": -0.9971591234207153, "logits/rejected": -0.4130152761936188, "logps/chosen": -407.35736083984375, "logps/rejected": -699.933837890625, "loss": 0.3878, "rewards/accuracies": 0.875, "rewards/chosen": -1.3271167278289795, "rewards/margins": 1.8487539291381836, "rewards/rejected": -3.175870895385742, "step": 1450 }, { "epoch": 0.38, "learning_rate": 3.883935506370605e-06, "logits/chosen": -1.3185368776321411, "logits/rejected": -0.5751763582229614, "logps/chosen": -538.9137573242188, "logps/rejected": -727.714599609375, "loss": 0.3787, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.7377328872680664, "rewards/margins": 1.1966904401779175, "rewards/rejected": -2.9344232082366943, "step": 1460 }, { "epoch": 0.38, "learning_rate": 3.864852992655617e-06, "logits/chosen": -1.2166504859924316, "logits/rejected": -0.45289698243141174, "logps/chosen": -516.9674682617188, "logps/rejected": -733.4182739257812, "loss": 0.3926, "rewards/accuracies": 0.8125, "rewards/chosen": -1.6966655254364014, "rewards/margins": 1.5359889268875122, "rewards/rejected": -3.232654571533203, "step": 1470 }, { "epoch": 0.39, "learning_rate": 3.845656514108516e-06, "logits/chosen": -1.2518984079360962, "logits/rejected": -0.23703515529632568, "logps/chosen": -571.0328979492188, "logps/rejected": -839.0745239257812, "loss": 0.409, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.9428062438964844, "rewards/margins": 1.905398964881897, "rewards/rejected": -3.84820556640625, "step": 1480 }, { "epoch": 0.39, "learning_rate": 3.826347673629738e-06, "logits/chosen": -1.241381049156189, "logits/rejected": -0.424204021692276, "logps/chosen": -517.6029052734375, "logps/rejected": -811.5399169921875, "loss": 0.4235, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.9763988256454468, "rewards/margins": 1.8420568704605103, "rewards/rejected": -3.818455457687378, "step": 1490 }, { "epoch": 0.39, "learning_rate": 3.8069280835019062e-06, "logits/chosen": -1.3552948236465454, "logits/rejected": -0.24583733081817627, "logps/chosen": -539.5227661132812, "logps/rejected": -833.4625244140625, "loss": 0.4193, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.008434295654297, "rewards/margins": 2.0153846740722656, "rewards/rejected": -4.0238189697265625, "step": 1500 }, { "epoch": 0.39, "eval_logits/chosen": 0.0019010701216757298, "eval_logits/rejected": 0.9998253583908081, "eval_logps/chosen": -544.3021240234375, "eval_logps/rejected": -767.47119140625, "eval_loss": 0.44199585914611816, "eval_rewards/accuracies": 0.7839999794960022, "eval_rewards/chosen": -1.8864127397537231, "eval_rewards/margins": 1.5982747077941895, "eval_rewards/rejected": -3.484687566757202, "eval_runtime": 1381.698, "eval_samples_per_second": 1.447, "eval_steps_per_second": 0.362, "step": 1500 }, { "epoch": 0.4, "learning_rate": 3.7873993652552077e-06, "logits/chosen": -1.0648901462554932, "logits/rejected": -0.4144531786441803, "logps/chosen": -475.7496643066406, "logps/rejected": -861.4631958007812, "loss": 0.3316, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": -1.5341570377349854, "rewards/margins": 2.4849419593811035, "rewards/rejected": -4.019099235534668, "step": 1510 }, { "epoch": 0.4, "learning_rate": 3.7677631495319953e-06, "logits/chosen": -1.0717235803604126, "logits/rejected": -0.44138726592063904, "logps/chosen": -545.8651733398438, "logps/rejected": -788.2182006835938, "loss": 0.4331, "rewards/accuracies": 0.8125, "rewards/chosen": -1.6811964511871338, "rewards/margins": 1.6335046291351318, "rewards/rejected": -3.3147010803222656, "step": 1520 }, { "epoch": 0.4, "learning_rate": 3.748021075950633e-06, "logits/chosen": -1.231894850730896, "logits/rejected": -0.649477481842041, "logps/chosen": -472.5684509277344, "logps/rejected": -648.9671630859375, "loss": 0.4403, "rewards/accuracies": 0.8125, "rewards/chosen": -1.6376155614852905, "rewards/margins": 1.3435395956039429, "rewards/rejected": -2.9811549186706543, "step": 1530 }, { "epoch": 0.4, "learning_rate": 3.7281747929685824e-06, "logits/chosen": -0.8811131715774536, "logits/rejected": -0.19349880516529083, "logps/chosen": -556.5198364257812, "logps/rejected": -868.0969848632812, "loss": 0.3711, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": -1.9225609302520752, "rewards/margins": 2.121070384979248, "rewards/rejected": -4.043631076812744, "step": 1540 }, { "epoch": 0.41, "learning_rate": 3.7082259577447604e-06, "logits/chosen": -1.3660091161727905, "logits/rejected": -0.37334832549095154, "logps/chosen": -508.5003356933594, "logps/rejected": -700.1402587890625, "loss": 0.4505, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.6233818531036377, "rewards/margins": 1.5405502319335938, "rewards/rejected": -3.1639320850372314, "step": 1550 }, { "epoch": 0.41, "learning_rate": 3.6881762360011688e-06, "logits/chosen": -0.9465176463127136, "logits/rejected": -0.1583428531885147, "logps/chosen": -568.060302734375, "logps/rejected": -805.0638427734375, "loss": 0.3998, "rewards/accuracies": 0.8125, "rewards/chosen": -2.06211256980896, "rewards/margins": 1.7412744760513306, "rewards/rejected": -3.80338716506958, "step": 1560 }, { "epoch": 0.41, "learning_rate": 3.668027301883802e-06, "logits/chosen": -0.6530407667160034, "logits/rejected": 0.17987249791622162, "logps/chosen": -569.39013671875, "logps/rejected": -819.4601440429688, "loss": 0.3991, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.138000726699829, "rewards/margins": 1.7371118068695068, "rewards/rejected": -3.8751120567321777, "step": 1570 }, { "epoch": 0.41, "learning_rate": 3.64778083782286e-06, "logits/chosen": -1.1196366548538208, "logits/rejected": 0.07997065782546997, "logps/chosen": -588.8242797851562, "logps/rejected": -888.84619140625, "loss": 0.3689, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -2.009155750274658, "rewards/margins": 2.2112624645233154, "rewards/rejected": -4.220418453216553, "step": 1580 }, { "epoch": 0.42, "learning_rate": 3.627438534392268e-06, "logits/chosen": -1.2351372241973877, "logits/rejected": -0.21363726258277893, "logps/chosen": -565.1137084960938, "logps/rejected": -799.2964477539062, "loss": 0.4302, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.9956506490707397, "rewards/margins": 1.7617241144180298, "rewards/rejected": -3.7573745250701904, "step": 1590 }, { "epoch": 0.42, "learning_rate": 3.607002090168506e-06, "logits/chosen": -1.1077851057052612, "logits/rejected": -0.3146376609802246, "logps/chosen": -519.68505859375, "logps/rejected": -786.7335815429688, "loss": 0.3644, "rewards/accuracies": 0.875, "rewards/chosen": -1.6840426921844482, "rewards/margins": 1.7921777963638306, "rewards/rejected": -3.4762203693389893, "step": 1600 }, { "epoch": 0.42, "learning_rate": 3.586473211588787e-06, "logits/chosen": -1.0645841360092163, "logits/rejected": 0.0885920599102974, "logps/chosen": -530.0689697265625, "logps/rejected": -746.6349487304688, "loss": 0.4293, "rewards/accuracies": 0.75, "rewards/chosen": -1.7966455221176147, "rewards/margins": 1.5383833646774292, "rewards/rejected": -3.335028886795044, "step": 1610 }, { "epoch": 0.42, "learning_rate": 3.5658536128085623e-06, "logits/chosen": -0.8512083292007446, "logits/rejected": -0.07845296710729599, "logps/chosen": -522.5213623046875, "logps/rejected": -783.8287353515625, "loss": 0.4465, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.6679248809814453, "rewards/margins": 1.7967729568481445, "rewards/rejected": -3.464698076248169, "step": 1620 }, { "epoch": 0.43, "learning_rate": 3.545145015558399e-06, "logits/chosen": -1.1375799179077148, "logits/rejected": -0.7033450603485107, "logps/chosen": -488.27825927734375, "logps/rejected": -743.138427734375, "loss": 0.4717, "rewards/accuracies": 0.8125, "rewards/chosen": -1.5062588453292847, "rewards/margins": 1.479614019393921, "rewards/rejected": -2.985872983932495, "step": 1630 }, { "epoch": 0.43, "learning_rate": 3.5243491490002056e-06, "logits/chosen": -1.122290849685669, "logits/rejected": -0.7316358685493469, "logps/chosen": -507.69036865234375, "logps/rejected": -690.9684448242188, "loss": 0.4651, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.5151287317276, "rewards/margins": 1.2003190517425537, "rewards/rejected": -2.7154476642608643, "step": 1640 }, { "epoch": 0.43, "learning_rate": 3.503467749582857e-06, "logits/chosen": -1.7979711294174194, "logits/rejected": -0.23376531898975372, "logps/chosen": -447.5152282714844, "logps/rejected": -644.4968872070312, "loss": 0.4207, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.3395702838897705, "rewards/margins": 1.2873914241790771, "rewards/rejected": -2.6269614696502686, "step": 1650 }, { "epoch": 0.43, "learning_rate": 3.4825025608971947e-06, "logits/chosen": -1.1949323415756226, "logits/rejected": -0.4591255784034729, "logps/chosen": -531.0167846679688, "logps/rejected": -708.4334716796875, "loss": 0.4157, "rewards/accuracies": 0.8125, "rewards/chosen": -1.6559035778045654, "rewards/margins": 1.3914124965667725, "rewards/rejected": -3.047316074371338, "step": 1660 }, { "epoch": 0.44, "learning_rate": 3.4614553335304407e-06, "logits/chosen": -0.9112270474433899, "logits/rejected": -0.7079882025718689, "logps/chosen": -477.55645751953125, "logps/rejected": -665.2545166015625, "loss": 0.4351, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.638381004333496, "rewards/margins": 0.9713879823684692, "rewards/rejected": -2.609769105911255, "step": 1670 }, { "epoch": 0.44, "learning_rate": 3.4403278249200222e-06, "logits/chosen": -1.1047728061676025, "logits/rejected": -0.209273099899292, "logps/chosen": -513.626708984375, "logps/rejected": -751.2394409179688, "loss": 0.3962, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -1.6597833633422852, "rewards/margins": 1.5487313270568848, "rewards/rejected": -3.20851469039917, "step": 1680 }, { "epoch": 0.44, "learning_rate": 3.4191217992068293e-06, "logits/chosen": -1.1829806566238403, "logits/rejected": 0.04266662523150444, "logps/chosen": -560.150634765625, "logps/rejected": -809.93896484375, "loss": 0.4279, "rewards/accuracies": 0.8125, "rewards/chosen": -1.8385143280029297, "rewards/margins": 1.672101378440857, "rewards/rejected": -3.510615825653076, "step": 1690 }, { "epoch": 0.44, "learning_rate": 3.3978390270879056e-06, "logits/chosen": -1.1551451683044434, "logits/rejected": -0.16915690898895264, "logps/chosen": -546.1964111328125, "logps/rejected": -733.3271484375, "loss": 0.4386, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.6938936710357666, "rewards/margins": 1.5033698081970215, "rewards/rejected": -3.197263240814209, "step": 1700 }, { "epoch": 0.45, "learning_rate": 3.3764812856685995e-06, "logits/chosen": -1.0993391275405884, "logits/rejected": -0.748005211353302, "logps/chosen": -503.6968688964844, "logps/rejected": -733.4053955078125, "loss": 0.4426, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -1.7742856740951538, "rewards/margins": 1.4196711778640747, "rewards/rejected": -3.1939570903778076, "step": 1710 }, { "epoch": 0.45, "learning_rate": 3.3550503583141726e-06, "logits/chosen": -1.1645628213882446, "logits/rejected": -0.29861804842948914, "logps/chosen": -516.9429931640625, "logps/rejected": -731.75634765625, "loss": 0.4486, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.7132374048233032, "rewards/margins": 1.4840338230133057, "rewards/rejected": -3.1972713470458984, "step": 1720 }, { "epoch": 0.45, "learning_rate": 3.3335480345008907e-06, "logits/chosen": -1.0770137310028076, "logits/rejected": -0.3164665400981903, "logps/chosen": -475.98345947265625, "logps/rejected": -717.9310302734375, "loss": 0.3996, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.5537554025650024, "rewards/margins": 1.6571037769317627, "rewards/rejected": -3.2108588218688965, "step": 1730 }, { "epoch": 0.46, "learning_rate": 3.3119761096666055e-06, "logits/chosen": -1.5323493480682373, "logits/rejected": -0.1781812459230423, "logps/chosen": -540.9231567382812, "logps/rejected": -732.0032958984375, "loss": 0.3972, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.7525060176849365, "rewards/margins": 1.5016968250274658, "rewards/rejected": -3.254202365875244, "step": 1740 }, { "epoch": 0.46, "learning_rate": 3.290336385060832e-06, "logits/chosen": -1.1251745223999023, "logits/rejected": -0.11602558940649033, "logps/chosen": -634.4666748046875, "logps/rejected": -833.8592529296875, "loss": 0.3915, "rewards/accuracies": 0.75, "rewards/chosen": -2.158566951751709, "rewards/margins": 1.6248620748519897, "rewards/rejected": -3.783428907394409, "step": 1750 }, { "epoch": 0.46, "learning_rate": 3.268630667594348e-06, "logits/chosen": -0.6574426889419556, "logits/rejected": -0.43358176946640015, "logps/chosen": -525.9124755859375, "logps/rejected": -769.3137817382812, "loss": 0.4214, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.048187017440796, "rewards/margins": 1.6054658889770508, "rewards/rejected": -3.6536529064178467, "step": 1760 }, { "epoch": 0.46, "learning_rate": 3.2468607696883147e-06, "logits/chosen": -1.0191020965576172, "logits/rejected": -0.8884264230728149, "logps/chosen": -542.0821533203125, "logps/rejected": -717.0391845703125, "loss": 0.439, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.9124071598052979, "rewards/margins": 1.0934618711471558, "rewards/rejected": -3.0058693885803223, "step": 1770 }, { "epoch": 0.47, "learning_rate": 3.225028509122944e-06, "logits/chosen": -0.6006935834884644, "logits/rejected": -0.7735892534255981, "logps/chosen": -506.019287109375, "logps/rejected": -766.4857177734375, "loss": 0.4162, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.817831039428711, "rewards/margins": 1.61776602268219, "rewards/rejected": -3.4355969429016113, "step": 1780 }, { "epoch": 0.47, "learning_rate": 3.2031357088857083e-06, "logits/chosen": -1.139946699142456, "logits/rejected": -0.35632461309432983, "logps/chosen": -478.1253967285156, "logps/rejected": -795.1430053710938, "loss": 0.3478, "rewards/accuracies": 0.875, "rewards/chosen": -1.6329262256622314, "rewards/margins": 2.2241363525390625, "rewards/rejected": -3.857062816619873, "step": 1790 }, { "epoch": 0.47, "learning_rate": 3.181184197019127e-06, "logits/chosen": -1.2290761470794678, "logits/rejected": -0.23889155685901642, "logps/chosen": -565.7088623046875, "logps/rejected": -842.8005981445312, "loss": 0.409, "rewards/accuracies": 0.8125, "rewards/chosen": -2.107991933822632, "rewards/margins": 2.049304485321045, "rewards/rejected": -4.157296180725098, "step": 1800 }, { "epoch": 0.47, "eval_logits/chosen": 0.5340744853019714, "eval_logits/rejected": 1.4875802993774414, "eval_logps/chosen": -561.5723266601562, "eval_logps/rejected": -791.2130126953125, "eval_loss": 0.43651697039604187, "eval_rewards/accuracies": 0.7919999957084656, "eval_rewards/chosen": -2.059115409851074, "eval_rewards/margins": 1.6629897356033325, "eval_rewards/rejected": -3.7221052646636963, "eval_runtime": 1381.695, "eval_samples_per_second": 1.447, "eval_steps_per_second": 0.362, "step": 1800 }, { "epoch": 0.47, "learning_rate": 3.159175806468126e-06, "logits/chosen": -1.0473666191101074, "logits/rejected": -0.06902176141738892, "logps/chosen": -595.341552734375, "logps/rejected": -831.9974365234375, "loss": 0.4152, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -1.9551061391830444, "rewards/margins": 1.7293351888656616, "rewards/rejected": -3.684441328048706, "step": 1810 }, { "epoch": 0.48, "learning_rate": 3.1371123749269804e-06, "logits/chosen": -1.026745319366455, "logits/rejected": -0.4049917757511139, "logps/chosen": -565.2854614257812, "logps/rejected": -764.7696533203125, "loss": 0.4013, "rewards/accuracies": 0.8125, "rewards/chosen": -1.9236133098602295, "rewards/margins": 1.5329084396362305, "rewards/rejected": -3.456521511077881, "step": 1820 }, { "epoch": 0.48, "learning_rate": 3.114995744685877e-06, "logits/chosen": -0.9977224469184875, "logits/rejected": -0.40141773223876953, "logps/chosen": -466.9925842285156, "logps/rejected": -759.6148071289062, "loss": 0.4686, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.7426468133926392, "rewards/margins": 1.7171356678009033, "rewards/rejected": -3.459782361984253, "step": 1830 }, { "epoch": 0.48, "learning_rate": 3.0928277624770743e-06, "logits/chosen": -1.512317180633545, "logits/rejected": -0.05047481134533882, "logps/chosen": -557.8958129882812, "logps/rejected": -753.0090942382812, "loss": 0.4698, "rewards/accuracies": 0.8125, "rewards/chosen": -1.98238205909729, "rewards/margins": 1.4256618022918701, "rewards/rejected": -3.408043622970581, "step": 1840 }, { "epoch": 0.48, "learning_rate": 3.070610279320708e-06, "logits/chosen": -1.1659696102142334, "logits/rejected": -0.22102081775665283, "logps/chosen": -540.2110595703125, "logps/rejected": -764.9635620117188, "loss": 0.3463, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -1.8047651052474976, "rewards/margins": 1.6794044971466064, "rewards/rejected": -3.4841697216033936, "step": 1850 }, { "epoch": 0.49, "learning_rate": 3.0483451503702264e-06, "logits/chosen": -0.9429510831832886, "logits/rejected": -0.374999463558197, "logps/chosen": -633.7833862304688, "logps/rejected": -881.3572387695312, "loss": 0.4, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -2.2336530685424805, "rewards/margins": 1.6140598058700562, "rewards/rejected": -3.847712993621826, "step": 1860 }, { "epoch": 0.49, "learning_rate": 3.0260342347574916e-06, "logits/chosen": -0.9639546275138855, "logits/rejected": -0.8156697154045105, "logps/chosen": -493.8570861816406, "logps/rejected": -728.9312744140625, "loss": 0.3857, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.7304465770721436, "rewards/margins": 1.2347511053085327, "rewards/rejected": -2.965198040008545, "step": 1870 }, { "epoch": 0.49, "learning_rate": 3.0036793954375358e-06, "logits/chosen": -1.5436639785766602, "logits/rejected": 0.08726786822080612, "logps/chosen": -512.5186157226562, "logps/rejected": -664.79296875, "loss": 0.4216, "rewards/accuracies": 0.75, "rewards/chosen": -1.7313350439071655, "rewards/margins": 1.4134327173233032, "rewards/rejected": -3.144767999649048, "step": 1880 }, { "epoch": 0.49, "learning_rate": 2.981282499033009e-06, "logits/chosen": -0.6750370264053345, "logits/rejected": -0.8198210597038269, "logps/chosen": -542.04052734375, "logps/rejected": -810.6235961914062, "loss": 0.4386, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -2.0354888439178467, "rewards/margins": 1.7842447757720947, "rewards/rejected": -3.8197338581085205, "step": 1890 }, { "epoch": 0.5, "learning_rate": 2.9588454156783163e-06, "logits/chosen": -1.0820724964141846, "logits/rejected": -0.7213941812515259, "logps/chosen": -541.3480224609375, "logps/rejected": -766.2906494140625, "loss": 0.4257, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.084608554840088, "rewards/margins": 1.3346532583236694, "rewards/rejected": -3.4192614555358887, "step": 1900 }, { "epoch": 0.5, "learning_rate": 2.9363700188634597e-06, "logits/chosen": -1.2453614473342896, "logits/rejected": -0.5085484385490417, "logps/chosen": -540.5175170898438, "logps/rejected": -765.7025756835938, "loss": 0.4321, "rewards/accuracies": 0.8125, "rewards/chosen": -1.895167589187622, "rewards/margins": 1.5056030750274658, "rewards/rejected": -3.4007697105407715, "step": 1910 }, { "epoch": 0.5, "learning_rate": 2.9138581852776053e-06, "logits/chosen": -1.1196304559707642, "logits/rejected": 0.026494156569242477, "logps/chosen": -559.1360473632812, "logps/rejected": -778.654296875, "loss": 0.4461, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.9397118091583252, "rewards/margins": 1.5184619426727295, "rewards/rejected": -3.4581737518310547, "step": 1920 }, { "epoch": 0.51, "learning_rate": 2.8913117946523805e-06, "logits/chosen": -1.2101647853851318, "logits/rejected": 0.05119786784052849, "logps/chosen": -497.8490295410156, "logps/rejected": -711.6702270507812, "loss": 0.4391, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.8607673645019531, "rewards/margins": 1.6270701885223389, "rewards/rejected": -3.487837314605713, "step": 1930 }, { "epoch": 0.51, "learning_rate": 2.8687327296049126e-06, "logits/chosen": -0.9110754132270813, "logits/rejected": -0.3987257182598114, "logps/chosen": -488.52056884765625, "logps/rejected": -814.98388671875, "loss": 0.385, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.5353851318359375, "rewards/margins": 2.019871711730957, "rewards/rejected": -3.5552570819854736, "step": 1940 }, { "epoch": 0.51, "learning_rate": 2.8461228754806376e-06, "logits/chosen": -1.2645906209945679, "logits/rejected": 0.05890879034996033, "logps/chosen": -571.50146484375, "logps/rejected": -764.4017944335938, "loss": 0.3804, "rewards/accuracies": 0.8125, "rewards/chosen": -1.847496747970581, "rewards/margins": 1.488777995109558, "rewards/rejected": -3.3362746238708496, "step": 1950 }, { "epoch": 0.51, "learning_rate": 2.823484120195865e-06, "logits/chosen": -1.035412311553955, "logits/rejected": 0.14751215279102325, "logps/chosen": -568.7293090820312, "logps/rejected": -764.1280517578125, "loss": 0.3716, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.9270780086517334, "rewards/margins": 1.58747398853302, "rewards/rejected": -3.514551877975464, "step": 1960 }, { "epoch": 0.52, "learning_rate": 2.8008183540801486e-06, "logits/chosen": -1.152499794960022, "logits/rejected": -0.26493799686431885, "logps/chosen": -610.4190063476562, "logps/rejected": -777.2071533203125, "loss": 0.3893, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.9002234935760498, "rewards/margins": 1.6286661624908447, "rewards/rejected": -3.5288894176483154, "step": 1970 }, { "epoch": 0.52, "learning_rate": 2.7781274697184353e-06, "logits/chosen": -1.2689648866653442, "logits/rejected": 0.2965567111968994, "logps/chosen": -519.8406982421875, "logps/rejected": -776.2745971679688, "loss": 0.451, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.7539297342300415, "rewards/margins": 1.7792565822601318, "rewards/rejected": -3.533186435699463, "step": 1980 }, { "epoch": 0.52, "learning_rate": 2.7554133617930397e-06, "logits/chosen": -1.1502325534820557, "logits/rejected": -0.16516944766044617, "logps/chosen": -586.9942626953125, "logps/rejected": -799.5856323242188, "loss": 0.3986, "rewards/accuracies": 0.75, "rewards/chosen": -1.9755744934082031, "rewards/margins": 1.5421812534332275, "rewards/rejected": -3.5177555084228516, "step": 1990 }, { "epoch": 0.52, "learning_rate": 2.7326779269254363e-06, "logits/chosen": -1.2239593267440796, "logits/rejected": -0.29785847663879395, "logps/chosen": -577.9757080078125, "logps/rejected": -780.0157470703125, "loss": 0.4163, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.8510382175445557, "rewards/margins": 1.496047019958496, "rewards/rejected": -3.3470852375030518, "step": 2000 }, { "epoch": 0.53, "learning_rate": 2.7099230635178954e-06, "logits/chosen": -0.8359963297843933, "logits/rejected": -0.609653115272522, "logps/chosen": -522.3953857421875, "logps/rejected": -836.3997192382812, "loss": 0.3754, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.8302185535430908, "rewards/margins": 1.9239788055419922, "rewards/rejected": -3.754197359085083, "step": 2010 }, { "epoch": 0.53, "learning_rate": 2.6871506715949608e-06, "logits/chosen": -1.2713494300842285, "logits/rejected": 0.4311772286891937, "logps/chosen": -510.71942138671875, "logps/rejected": -779.8484497070312, "loss": 0.2961, "rewards/accuracies": 0.8125, "rewards/chosen": -1.6959383487701416, "rewards/margins": 1.9984363317489624, "rewards/rejected": -3.6943740844726562, "step": 2020 }, { "epoch": 0.53, "learning_rate": 2.6643626526448063e-06, "logits/chosen": -0.745600700378418, "logits/rejected": -0.04406242445111275, "logps/chosen": -545.650390625, "logps/rejected": -726.6231689453125, "loss": 0.4021, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.117701768875122, "rewards/margins": 1.459877848625183, "rewards/rejected": -3.5775794982910156, "step": 2030 }, { "epoch": 0.53, "learning_rate": 2.6415609094604562e-06, "logits/chosen": -0.6423822641372681, "logits/rejected": -0.5382918119430542, "logps/chosen": -555.1259765625, "logps/rejected": -854.6931762695312, "loss": 0.4133, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -2.1485462188720703, "rewards/margins": 1.8231878280639648, "rewards/rejected": -3.9717342853546143, "step": 2040 }, { "epoch": 0.54, "learning_rate": 2.618747345980904e-06, "logits/chosen": -0.942598819732666, "logits/rejected": -0.03942962735891342, "logps/chosen": -508.3501892089844, "logps/rejected": -809.349365234375, "loss": 0.3591, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.922590970993042, "rewards/margins": 2.0935001373291016, "rewards/rejected": -4.016091346740723, "step": 2050 }, { "epoch": 0.54, "learning_rate": 2.595923867132136e-06, "logits/chosen": -0.9166833162307739, "logits/rejected": 0.08796543627977371, "logps/chosen": -561.3500366210938, "logps/rejected": -813.5736083984375, "loss": 0.3879, "rewards/accuracies": 0.8125, "rewards/chosen": -2.1458635330200195, "rewards/margins": 1.7413724660873413, "rewards/rejected": -3.887235641479492, "step": 2060 }, { "epoch": 0.54, "learning_rate": 2.5730923786680672e-06, "logits/chosen": -0.8908794522285461, "logits/rejected": 0.45012766122817993, "logps/chosen": -528.106689453125, "logps/rejected": -768.5990600585938, "loss": 0.3902, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.0598998069763184, "rewards/margins": 1.840790033340454, "rewards/rejected": -3.9006900787353516, "step": 2070 }, { "epoch": 0.54, "learning_rate": 2.5502547870114137e-06, "logits/chosen": -0.44030576944351196, "logits/rejected": 0.27115920186042786, "logps/chosen": -541.02392578125, "logps/rejected": -866.5523681640625, "loss": 0.4173, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.1856839656829834, "rewards/margins": 2.092510223388672, "rewards/rejected": -4.278193473815918, "step": 2080 }, { "epoch": 0.55, "learning_rate": 2.527412999094507e-06, "logits/chosen": -0.960748553276062, "logits/rejected": 0.4776690602302551, "logps/chosen": -641.7643432617188, "logps/rejected": -842.5614013671875, "loss": 0.4317, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.455843448638916, "rewards/margins": 1.5613322257995605, "rewards/rejected": -4.017176151275635, "step": 2090 }, { "epoch": 0.55, "learning_rate": 2.504568922200064e-06, "logits/chosen": -1.032212495803833, "logits/rejected": 0.38921135663986206, "logps/chosen": -526.1879272460938, "logps/rejected": -780.3800659179688, "loss": 0.4037, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.8752429485321045, "rewards/margins": 2.1657333374023438, "rewards/rejected": -4.040976524353027, "step": 2100 }, { "epoch": 0.55, "eval_logits/chosen": 0.9488687515258789, "eval_logits/rejected": 1.9484919309616089, "eval_logps/chosen": -568.4110107421875, "eval_logps/rejected": -807.3529052734375, "eval_loss": 0.43342798948287964, "eval_rewards/accuracies": 0.796999990940094, "eval_rewards/chosen": -2.127501964569092, "eval_rewards/margins": 1.7560021877288818, "eval_rewards/rejected": -3.8835039138793945, "eval_runtime": 1369.4586, "eval_samples_per_second": 1.46, "eval_steps_per_second": 0.365, "step": 2100 }, { "epoch": 0.55, "learning_rate": 2.4817244638019333e-06, "logits/chosen": -0.7853974103927612, "logits/rejected": 0.26276087760925293, "logps/chosen": -576.4677734375, "logps/rejected": -882.1696166992188, "loss": 0.4205, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.413973331451416, "rewards/margins": 1.9889461994171143, "rewards/rejected": -4.402919292449951, "step": 2110 }, { "epoch": 0.55, "learning_rate": 2.4588815314058155e-06, "logits/chosen": -0.7402883768081665, "logits/rejected": 0.13213138282299042, "logps/chosen": -515.807373046875, "logps/rejected": -840.2550048828125, "loss": 0.4364, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.9831836223602295, "rewards/margins": 2.3909099102020264, "rewards/rejected": -4.374093532562256, "step": 2120 }, { "epoch": 0.56, "learning_rate": 2.4360420323899922e-06, "logits/chosen": -0.9780701398849487, "logits/rejected": -0.47581759095191956, "logps/chosen": -536.0255126953125, "logps/rejected": -821.7635498046875, "loss": 0.4016, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.8594684600830078, "rewards/margins": 1.7201998233795166, "rewards/rejected": -3.5796680450439453, "step": 2130 }, { "epoch": 0.56, "learning_rate": 2.4132078738460585e-06, "logits/chosen": -0.7851123809814453, "logits/rejected": -0.3619709610939026, "logps/chosen": -536.1060791015625, "logps/rejected": -812.8576049804688, "loss": 0.3879, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.774129867553711, "rewards/margins": 1.8565963506698608, "rewards/rejected": -3.6307265758514404, "step": 2140 }, { "epoch": 0.56, "learning_rate": 2.3903809624196826e-06, "logits/chosen": -1.0001862049102783, "logits/rejected": -0.3588159680366516, "logps/chosen": -524.4048461914062, "logps/rejected": -757.30224609375, "loss": 0.4045, "rewards/accuracies": 0.8125, "rewards/chosen": -1.6777470111846924, "rewards/margins": 1.4243619441986084, "rewards/rejected": -3.10210919380188, "step": 2150 }, { "epoch": 0.57, "learning_rate": 2.3675632041513978e-06, "logits/chosen": -0.8742032051086426, "logits/rejected": 0.1250528246164322, "logps/chosen": -591.0662841796875, "logps/rejected": -873.0941162109375, "loss": 0.3503, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.1168835163116455, "rewards/margins": 1.8603336811065674, "rewards/rejected": -3.9772167205810547, "step": 2160 }, { "epoch": 0.57, "learning_rate": 2.3447565043174533e-06, "logits/chosen": -0.9236310124397278, "logits/rejected": -0.3211767077445984, "logps/chosen": -604.6006469726562, "logps/rejected": -848.4435424804688, "loss": 0.3555, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.398642063140869, "rewards/margins": 1.6593701839447021, "rewards/rejected": -4.058012962341309, "step": 2170 }, { "epoch": 0.57, "learning_rate": 2.321962767270724e-06, "logits/chosen": -0.4545938968658447, "logits/rejected": -0.04614262655377388, "logps/chosen": -522.8587646484375, "logps/rejected": -840.7001953125, "loss": 0.424, "rewards/accuracies": 0.8125, "rewards/chosen": -2.117671012878418, "rewards/margins": 2.160691499710083, "rewards/rejected": -4.278363227844238, "step": 2180 }, { "epoch": 0.57, "learning_rate": 2.299183896281692e-06, "logits/chosen": -0.8900741338729858, "logits/rejected": 0.11245179176330566, "logps/chosen": -621.9942626953125, "logps/rejected": -912.0670776367188, "loss": 0.3948, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.5180599689483643, "rewards/margins": 2.2509348392486572, "rewards/rejected": -4.768994331359863, "step": 2190 }, { "epoch": 0.58, "learning_rate": 2.2764217933795297e-06, "logits/chosen": -1.2973425388336182, "logits/rejected": 0.3502582907676697, "logps/chosen": -648.1968994140625, "logps/rejected": -849.2420043945312, "loss": 0.4569, "rewards/accuracies": 0.75, "rewards/chosen": -2.5464279651641846, "rewards/margins": 1.8135731220245361, "rewards/rejected": -4.3600006103515625, "step": 2200 }, { "epoch": 0.58, "learning_rate": 2.2536783591932786e-06, "logits/chosen": -1.0717593431472778, "logits/rejected": -0.04763598367571831, "logps/chosen": -564.8670654296875, "logps/rejected": -862.4423828125, "loss": 0.383, "rewards/accuracies": 0.8125, "rewards/chosen": -2.1421093940734863, "rewards/margins": 2.115607976913452, "rewards/rejected": -4.257718086242676, "step": 2210 }, { "epoch": 0.58, "learning_rate": 2.230955492793149e-06, "logits/chosen": -0.6327053308486938, "logits/rejected": -0.22368088364601135, "logps/chosen": -552.7775268554688, "logps/rejected": -753.1697998046875, "loss": 0.4299, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.939135193824768, "rewards/margins": 1.3090981245040894, "rewards/rejected": -3.2482333183288574, "step": 2220 }, { "epoch": 0.58, "learning_rate": 2.208255091531947e-06, "logits/chosen": -0.9401483535766602, "logits/rejected": 0.13758346438407898, "logps/chosen": -531.6614990234375, "logps/rejected": -746.9278564453125, "loss": 0.4358, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.8913850784301758, "rewards/margins": 1.5273140668869019, "rewards/rejected": -3.418699264526367, "step": 2230 }, { "epoch": 0.59, "learning_rate": 2.1855790508866435e-06, "logits/chosen": -1.4784080982208252, "logits/rejected": 0.4357272982597351, "logps/chosen": -597.9056396484375, "logps/rejected": -766.9682006835938, "loss": 0.4395, "rewards/accuracies": 0.8125, "rewards/chosen": -2.0671706199645996, "rewards/margins": 1.5233346223831177, "rewards/rejected": -3.5905051231384277, "step": 2240 }, { "epoch": 0.59, "learning_rate": 2.162929264300107e-06, "logits/chosen": -1.0196201801300049, "logits/rejected": 0.6838423013687134, "logps/chosen": -581.2928466796875, "logps/rejected": -801.315185546875, "loss": 0.4091, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.0828678607940674, "rewards/margins": 1.6539685726165771, "rewards/rejected": -3.7368361949920654, "step": 2250 }, { "epoch": 0.59, "learning_rate": 2.1403076230230006e-06, "logits/chosen": -0.6144061088562012, "logits/rejected": -0.3462333083152771, "logps/chosen": -555.2684326171875, "logps/rejected": -745.2354736328125, "loss": 0.4423, "rewards/accuracies": 0.8125, "rewards/chosen": -1.8994195461273193, "rewards/margins": 1.2702502012252808, "rewards/rejected": -3.1696696281433105, "step": 2260 }, { "epoch": 0.59, "learning_rate": 2.11771601595586e-06, "logits/chosen": -1.3248759508132935, "logits/rejected": -0.06512956321239471, "logps/chosen": -526.8836669921875, "logps/rejected": -727.4069213867188, "loss": 0.4316, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -1.7024120092391968, "rewards/margins": 1.4804304838180542, "rewards/rejected": -3.182842493057251, "step": 2270 }, { "epoch": 0.6, "learning_rate": 2.0951563294913737e-06, "logits/chosen": -0.8413470387458801, "logits/rejected": -0.06420852988958359, "logps/chosen": -514.109375, "logps/rejected": -770.6260986328125, "loss": 0.3656, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.8018481731414795, "rewards/margins": 1.71941339969635, "rewards/rejected": -3.5212619304656982, "step": 2280 }, { "epoch": 0.6, "learning_rate": 2.0726304473568693e-06, "logits/chosen": -1.5680863857269287, "logits/rejected": 0.14398300647735596, "logps/chosen": -554.5525512695312, "logps/rejected": -771.2307739257812, "loss": 0.4331, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.7925901412963867, "rewards/margins": 1.641396164894104, "rewards/rejected": -3.433986186981201, "step": 2290 }, { "epoch": 0.6, "learning_rate": 2.050140250457023e-06, "logits/chosen": -1.147033452987671, "logits/rejected": -0.169643372297287, "logps/chosen": -544.5403442382812, "logps/rejected": -771.8599243164062, "loss": 0.411, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.702807068824768, "rewards/margins": 1.5390836000442505, "rewards/rejected": -3.2418906688690186, "step": 2300 }, { "epoch": 0.6, "learning_rate": 2.0276876167168042e-06, "logits/chosen": -1.0450040102005005, "logits/rejected": -0.5411997437477112, "logps/chosen": -464.33636474609375, "logps/rejected": -644.3059692382812, "loss": 0.4458, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.661592721939087, "rewards/margins": 1.0601894855499268, "rewards/rejected": -2.7217824459075928, "step": 2310 }, { "epoch": 0.61, "learning_rate": 2.0052744209246682e-06, "logits/chosen": -0.9377092123031616, "logits/rejected": -0.40263956785202026, "logps/chosen": -498.0686950683594, "logps/rejected": -739.6976318359375, "loss": 0.4114, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.7086822986602783, "rewards/margins": 1.4424405097961426, "rewards/rejected": -3.151122570037842, "step": 2320 }, { "epoch": 0.61, "learning_rate": 1.9829025345760127e-06, "logits/chosen": -0.7755551338195801, "logits/rejected": -0.34596508741378784, "logps/chosen": -440.62872314453125, "logps/rejected": -650.15478515625, "loss": 0.3539, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": -1.3974610567092896, "rewards/margins": 1.319804310798645, "rewards/rejected": -2.7172653675079346, "step": 2330 }, { "epoch": 0.61, "learning_rate": 1.9605738257169115e-06, "logits/chosen": -1.3644423484802246, "logits/rejected": -0.3873172700405121, "logps/chosen": -490.96466064453125, "logps/rejected": -707.909423828125, "loss": 0.3942, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.6896295547485352, "rewards/margins": 1.3346831798553467, "rewards/rejected": -3.0243124961853027, "step": 2340 }, { "epoch": 0.62, "learning_rate": 1.9382901587881275e-06, "logits/chosen": -1.3390328884124756, "logits/rejected": -0.3258362114429474, "logps/chosen": -560.3353271484375, "logps/rejected": -710.94091796875, "loss": 0.4555, "rewards/accuracies": 0.75, "rewards/chosen": -1.788905143737793, "rewards/margins": 1.1810920238494873, "rewards/rejected": -2.9699971675872803, "step": 2350 }, { "epoch": 0.62, "learning_rate": 1.916053394469437e-06, "logits/chosen": -1.2866407632827759, "logits/rejected": -0.09522955119609833, "logps/chosen": -534.5072021484375, "logps/rejected": -711.5755004882812, "loss": 0.3582, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -1.719448447227478, "rewards/margins": 1.4744646549224854, "rewards/rejected": -3.193912982940674, "step": 2360 }, { "epoch": 0.62, "learning_rate": 1.8938653895242604e-06, "logits/chosen": -1.3210828304290771, "logits/rejected": 0.18320707976818085, "logps/chosen": -570.3659057617188, "logps/rejected": -748.9262084960938, "loss": 0.38, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.9527915716171265, "rewards/margins": 1.4824120998382568, "rewards/rejected": -3.4352035522460938, "step": 2370 }, { "epoch": 0.62, "learning_rate": 1.8717279966446267e-06, "logits/chosen": -0.6077791452407837, "logits/rejected": -0.07136712223291397, "logps/chosen": -561.3985595703125, "logps/rejected": -833.6058349609375, "loss": 0.4602, "rewards/accuracies": 0.875, "rewards/chosen": -1.8687776327133179, "rewards/margins": 2.1088478565216064, "rewards/rejected": -3.9776253700256348, "step": 2380 }, { "epoch": 0.63, "learning_rate": 1.8496430642964698e-06, "logits/chosen": -0.6436141133308411, "logits/rejected": -0.7390815615653992, "logps/chosen": -535.98291015625, "logps/rejected": -785.1193237304688, "loss": 0.4384, "rewards/accuracies": 0.875, "rewards/chosen": -1.8680446147918701, "rewards/margins": 1.4708973169326782, "rewards/rejected": -3.338942050933838, "step": 2390 }, { "epoch": 0.63, "learning_rate": 1.827612436565286e-06, "logits/chosen": -0.7472543716430664, "logits/rejected": -0.552331805229187, "logps/chosen": -512.9714965820312, "logps/rejected": -776.1787109375, "loss": 0.3829, "rewards/accuracies": 0.8125, "rewards/chosen": -1.701409101486206, "rewards/margins": 1.6592018604278564, "rewards/rejected": -3.3606104850769043, "step": 2400 }, { "epoch": 0.63, "eval_logits/chosen": 0.5047381520271301, "eval_logits/rejected": 1.5421233177185059, "eval_logps/chosen": -543.5669555664062, "eval_logps/rejected": -768.0193481445312, "eval_loss": 0.4248420000076294, "eval_rewards/accuracies": 0.8009999990463257, "eval_rewards/chosen": -1.879061222076416, "eval_rewards/margins": 1.611107587814331, "eval_rewards/rejected": -3.490169048309326, "eval_runtime": 1378.4799, "eval_samples_per_second": 1.451, "eval_steps_per_second": 0.363, "step": 2400 }, { "epoch": 0.63, "learning_rate": 1.8056379530021492e-06, "logits/chosen": -1.644568681716919, "logits/rejected": 0.2553193271160126, "logps/chosen": -535.88134765625, "logps/rejected": -778.2227783203125, "loss": 0.3769, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.729379415512085, "rewards/margins": 1.8933357000350952, "rewards/rejected": -3.6227145195007324, "step": 2410 }, { "epoch": 0.63, "learning_rate": 1.7837214484701154e-06, "logits/chosen": -1.0312998294830322, "logits/rejected": -0.9269927144050598, "logps/chosen": -492.1142578125, "logps/rejected": -759.744140625, "loss": 0.3937, "rewards/accuracies": 0.887499988079071, "rewards/chosen": -1.6029274463653564, "rewards/margins": 1.807284951210022, "rewards/rejected": -3.410212278366089, "step": 2420 }, { "epoch": 0.64, "learning_rate": 1.7618647529910043e-06, "logits/chosen": -1.2176530361175537, "logits/rejected": 0.223758265376091, "logps/chosen": -529.0576171875, "logps/rejected": -756.2348022460938, "loss": 0.4664, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.851143479347229, "rewards/margins": 1.5376180410385132, "rewards/rejected": -3.388761520385742, "step": 2430 }, { "epoch": 0.64, "learning_rate": 1.7400696915925996e-06, "logits/chosen": -1.2333937883377075, "logits/rejected": -0.22815477848052979, "logps/chosen": -530.9912719726562, "logps/rejected": -742.2661743164062, "loss": 0.396, "rewards/accuracies": 0.8125, "rewards/chosen": -1.743680715560913, "rewards/margins": 1.500349521636963, "rewards/rejected": -3.244030475616455, "step": 2440 }, { "epoch": 0.64, "learning_rate": 1.718338084156254e-06, "logits/chosen": -1.519798994064331, "logits/rejected": -0.11792447417974472, "logps/chosen": -558.024658203125, "logps/rejected": -786.2930908203125, "loss": 0.4233, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.6490389108657837, "rewards/margins": 1.6921495199203491, "rewards/rejected": -3.341188430786133, "step": 2450 }, { "epoch": 0.64, "learning_rate": 1.6966717452649372e-06, "logits/chosen": -1.2660577297210693, "logits/rejected": -0.0037010847590863705, "logps/chosen": -472.52471923828125, "logps/rejected": -651.4722900390625, "loss": 0.4358, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.6838678121566772, "rewards/margins": 1.2879998683929443, "rewards/rejected": -2.971867561340332, "step": 2460 }, { "epoch": 0.65, "learning_rate": 1.6750724840517103e-06, "logits/chosen": -1.2322640419006348, "logits/rejected": -0.4316504895687103, "logps/chosen": -495.26544189453125, "logps/rejected": -720.3032836914062, "loss": 0.4283, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.5014667510986328, "rewards/margins": 1.5642062425613403, "rewards/rejected": -3.0656726360321045, "step": 2470 }, { "epoch": 0.65, "learning_rate": 1.6535421040486686e-06, "logits/chosen": -0.9225546717643738, "logits/rejected": 0.307370662689209, "logps/chosen": -537.8445434570312, "logps/rejected": -764.1954345703125, "loss": 0.4295, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.8402236700057983, "rewards/margins": 1.5403947830200195, "rewards/rejected": -3.3806185722351074, "step": 2480 }, { "epoch": 0.65, "learning_rate": 1.6320824030363458e-06, "logits/chosen": -0.8234320878982544, "logits/rejected": -0.4902985095977783, "logps/chosen": -521.9378662109375, "logps/rejected": -800.2440185546875, "loss": 0.3775, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.6927446126937866, "rewards/margins": 1.8545596599578857, "rewards/rejected": -3.547304630279541, "step": 2490 }, { "epoch": 0.65, "learning_rate": 1.6106951728936028e-06, "logits/chosen": -0.9694339036941528, "logits/rejected": -0.04356659576296806, "logps/chosen": -550.1474609375, "logps/rejected": -714.2723999023438, "loss": 0.3934, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.6208375692367554, "rewards/margins": 1.5635387897491455, "rewards/rejected": -3.1843764781951904, "step": 2500 }, { "epoch": 0.66, "learning_rate": 1.5893821994479996e-06, "logits/chosen": -1.3845258951187134, "logits/rejected": 0.13586857914924622, "logps/chosen": -514.47509765625, "logps/rejected": -733.2557373046875, "loss": 0.4431, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.7695598602294922, "rewards/margins": 1.6184390783309937, "rewards/rejected": -3.3879990577697754, "step": 2510 }, { "epoch": 0.66, "learning_rate": 1.5681452623266868e-06, "logits/chosen": -0.6911166310310364, "logits/rejected": -0.19654271006584167, "logps/chosen": -473.919189453125, "logps/rejected": -720.5162963867188, "loss": 0.4099, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.6180083751678467, "rewards/margins": 1.4277281761169434, "rewards/rejected": -3.04573655128479, "step": 2520 }, { "epoch": 0.66, "learning_rate": 1.5469861348078014e-06, "logits/chosen": -1.3716986179351807, "logits/rejected": -0.06388586759567261, "logps/chosen": -564.1226196289062, "logps/rejected": -798.0968017578125, "loss": 0.4469, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.9020678997039795, "rewards/margins": 1.5944623947143555, "rewards/rejected": -3.496530532836914, "step": 2530 }, { "epoch": 0.66, "learning_rate": 1.5259065836724035e-06, "logits/chosen": -0.8546286821365356, "logits/rejected": -0.3107864260673523, "logps/chosen": -550.43017578125, "logps/rejected": -754.4808349609375, "loss": 0.3968, "rewards/accuracies": 0.8125, "rewards/chosen": -2.006619453430176, "rewards/margins": 1.325626015663147, "rewards/rejected": -3.3322455883026123, "step": 2540 }, { "epoch": 0.67, "learning_rate": 1.5049083690569456e-06, "logits/chosen": -1.1054461002349854, "logits/rejected": -0.2836257517337799, "logps/chosen": -498.7848205566406, "logps/rejected": -744.7251586914062, "loss": 0.4576, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.7602741718292236, "rewards/margins": 1.6243937015533447, "rewards/rejected": -3.3846676349639893, "step": 2550 }, { "epoch": 0.67, "learning_rate": 1.4839932443063057e-06, "logits/chosen": -1.052145004272461, "logits/rejected": -0.4817884564399719, "logps/chosen": -500.67022705078125, "logps/rejected": -772.7791748046875, "loss": 0.4111, "rewards/accuracies": 0.8125, "rewards/chosen": -1.7564786672592163, "rewards/margins": 1.9239161014556885, "rewards/rejected": -3.6803946495056152, "step": 2560 }, { "epoch": 0.67, "learning_rate": 1.4631629558273803e-06, "logits/chosen": -1.1972812414169312, "logits/rejected": -0.44412803649902344, "logps/chosen": -612.2957153320312, "logps/rejected": -818.8905029296875, "loss": 0.4121, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.9536006450653076, "rewards/margins": 1.6227566003799438, "rewards/rejected": -3.576357364654541, "step": 2570 }, { "epoch": 0.68, "learning_rate": 1.4424192429432657e-06, "logits/chosen": -1.1940516233444214, "logits/rejected": -0.6190989017486572, "logps/chosen": -545.1439208984375, "logps/rejected": -734.8392333984375, "loss": 0.5085, "rewards/accuracies": 0.75, "rewards/chosen": -1.8257439136505127, "rewards/margins": 1.1313966512680054, "rewards/rejected": -2.9571404457092285, "step": 2580 }, { "epoch": 0.68, "learning_rate": 1.421763837748016e-06, "logits/chosen": -0.8670721054077148, "logits/rejected": 0.19919352233409882, "logps/chosen": -536.2199096679688, "logps/rejected": -726.8831787109375, "loss": 0.3427, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -1.5812265872955322, "rewards/margins": 1.5793380737304688, "rewards/rejected": -3.160564422607422, "step": 2590 }, { "epoch": 0.68, "learning_rate": 1.401198464962021e-06, "logits/chosen": -0.7592190504074097, "logits/rejected": -0.6002682447433472, "logps/chosen": -536.698974609375, "logps/rejected": -750.5850219726562, "loss": 0.4207, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.7439777851104736, "rewards/margins": 1.444570779800415, "rewards/rejected": -3.1885488033294678, "step": 2600 }, { "epoch": 0.68, "learning_rate": 1.3807248417879896e-06, "logits/chosen": -0.9805696606636047, "logits/rejected": -0.1842125803232193, "logps/chosen": -496.516845703125, "logps/rejected": -714.7666015625, "loss": 0.4135, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.775261640548706, "rewards/margins": 1.5725538730621338, "rewards/rejected": -3.347815752029419, "step": 2610 }, { "epoch": 0.69, "learning_rate": 1.3603446777675665e-06, "logits/chosen": -1.047123670578003, "logits/rejected": -0.30356377363204956, "logps/chosen": -510.2223205566406, "logps/rejected": -704.94091796875, "loss": 0.4062, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -1.7939850091934204, "rewards/margins": 1.3114802837371826, "rewards/rejected": -3.1054649353027344, "step": 2620 }, { "epoch": 0.69, "learning_rate": 1.3400596746385817e-06, "logits/chosen": -1.089388370513916, "logits/rejected": -0.30947428941726685, "logps/chosen": -532.441650390625, "logps/rejected": -787.63720703125, "loss": 0.3845, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -1.7372934818267822, "rewards/margins": 1.8088710308074951, "rewards/rejected": -3.5461642742156982, "step": 2630 }, { "epoch": 0.69, "learning_rate": 1.3198715261929587e-06, "logits/chosen": -0.878930389881134, "logits/rejected": -0.4860079884529114, "logps/chosen": -504.37701416015625, "logps/rejected": -719.3287963867188, "loss": 0.4429, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.7471263408660889, "rewards/margins": 1.4954102039337158, "rewards/rejected": -3.2425365447998047, "step": 2640 }, { "epoch": 0.69, "learning_rate": 1.2997819181352823e-06, "logits/chosen": -1.1908671855926514, "logits/rejected": 0.06708762049674988, "logps/chosen": -486.3038024902344, "logps/rejected": -774.8641357421875, "loss": 0.3094, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": -1.6818081140518188, "rewards/margins": 2.000152587890625, "rewards/rejected": -3.6819605827331543, "step": 2650 }, { "epoch": 0.7, "learning_rate": 1.2797925279420454e-06, "logits/chosen": -0.9907468557357788, "logits/rejected": -0.3420366942882538, "logps/chosen": -546.3676147460938, "logps/rejected": -735.4510498046875, "loss": 0.3966, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.9783893823623657, "rewards/margins": 1.4355647563934326, "rewards/rejected": -3.413954257965088, "step": 2660 }, { "epoch": 0.7, "learning_rate": 1.2599050247215764e-06, "logits/chosen": -1.4948866367340088, "logits/rejected": 0.49219974875450134, "logps/chosen": -573.19140625, "logps/rejected": -743.2158203125, "loss": 0.4396, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.086617946624756, "rewards/margins": 1.3520699739456177, "rewards/rejected": -3.438687801361084, "step": 2670 }, { "epoch": 0.7, "learning_rate": 1.2401210690746705e-06, "logits/chosen": -0.823261559009552, "logits/rejected": -0.15638458728790283, "logps/chosen": -535.7666625976562, "logps/rejected": -808.8283081054688, "loss": 0.4014, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.9818382263183594, "rewards/margins": 1.6475715637207031, "rewards/rejected": -3.629409074783325, "step": 2680 }, { "epoch": 0.7, "learning_rate": 1.2204423129559306e-06, "logits/chosen": -0.9111806154251099, "logits/rejected": -0.8399251699447632, "logps/chosen": -543.661865234375, "logps/rejected": -744.8538818359375, "loss": 0.4439, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.9010549783706665, "rewards/margins": 1.2256118059158325, "rewards/rejected": -3.126666784286499, "step": 2690 }, { "epoch": 0.71, "learning_rate": 1.20087039953583e-06, "logits/chosen": -1.051735281944275, "logits/rejected": -0.22251495718955994, "logps/chosen": -584.2539672851562, "logps/rejected": -825.45703125, "loss": 0.47, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.9151395559310913, "rewards/margins": 1.5970687866210938, "rewards/rejected": -3.5122084617614746, "step": 2700 }, { "epoch": 0.71, "eval_logits/chosen": 0.5343354940414429, "eval_logits/rejected": 1.5151687860488892, "eval_logps/chosen": -541.308837890625, "eval_logps/rejected": -759.2698974609375, "eval_loss": 0.4210600256919861, "eval_rewards/accuracies": 0.8029999732971191, "eval_rewards/chosen": -1.8564802408218384, "eval_rewards/margins": 1.5461931228637695, "eval_rewards/rejected": -3.4026734828948975, "eval_runtime": 1376.5386, "eval_samples_per_second": 1.453, "eval_steps_per_second": 0.363, "step": 2700 }, { "epoch": 0.71, "learning_rate": 1.181406963063507e-06, "logits/chosen": -1.0936121940612793, "logits/rejected": -0.3421854078769684, "logps/chosen": -551.6010131835938, "logps/rejected": -727.1361083984375, "loss": 0.523, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.9586423635482788, "rewards/margins": 1.3370798826217651, "rewards/rejected": -3.295722484588623, "step": 2710 }, { "epoch": 0.71, "learning_rate": 1.1620536287303052e-06, "logits/chosen": -1.2988653182983398, "logits/rejected": -0.4047287404537201, "logps/chosen": -515.3692626953125, "logps/rejected": -714.9964599609375, "loss": 0.4243, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -1.6312452554702759, "rewards/margins": 1.321842074394226, "rewards/rejected": -2.953087091445923, "step": 2720 }, { "epoch": 0.71, "learning_rate": 1.1428120125340717e-06, "logits/chosen": -1.1262309551239014, "logits/rejected": 0.1220148354768753, "logps/chosen": -560.1183471679688, "logps/rejected": -755.31103515625, "loss": 0.4397, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.7473366260528564, "rewards/margins": 1.4563024044036865, "rewards/rejected": -3.203639268875122, "step": 2730 }, { "epoch": 0.72, "learning_rate": 1.123683721144223e-06, "logits/chosen": -1.1425328254699707, "logits/rejected": -0.3113950788974762, "logps/chosen": -507.19598388671875, "logps/rejected": -742.7738037109375, "loss": 0.3137, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -1.6102272272109985, "rewards/margins": 1.5865800380706787, "rewards/rejected": -3.196807384490967, "step": 2740 }, { "epoch": 0.72, "learning_rate": 1.1046703517675848e-06, "logits/chosen": -1.2373132705688477, "logits/rejected": -0.09322497248649597, "logps/chosen": -504.95501708984375, "logps/rejected": -680.431396484375, "loss": 0.4333, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.792452096939087, "rewards/margins": 1.1824333667755127, "rewards/rejected": -2.9748852252960205, "step": 2750 }, { "epoch": 0.72, "learning_rate": 1.085773492015028e-06, "logits/chosen": -0.8464914560317993, "logits/rejected": -0.11784086376428604, "logps/chosen": -549.912841796875, "logps/rejected": -770.5538330078125, "loss": 0.4082, "rewards/accuracies": 0.8125, "rewards/chosen": -1.764224648475647, "rewards/margins": 1.4827383756637573, "rewards/rejected": -3.2469630241394043, "step": 2760 }, { "epoch": 0.72, "learning_rate": 1.0669947197689034e-06, "logits/chosen": -0.7767351269721985, "logits/rejected": 0.08518421649932861, "logps/chosen": -487.90399169921875, "logps/rejected": -754.19287109375, "loss": 0.3626, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.7886440753936768, "rewards/margins": 1.6782163381576538, "rewards/rejected": -3.466860294342041, "step": 2770 }, { "epoch": 0.73, "learning_rate": 1.048335603051291e-06, "logits/chosen": -0.9023948907852173, "logits/rejected": 0.5675928592681885, "logps/chosen": -573.4138793945312, "logps/rejected": -760.1019897460938, "loss": 0.3304, "rewards/accuracies": 0.8125, "rewards/chosen": -1.9305530786514282, "rewards/margins": 1.7111543416976929, "rewards/rejected": -3.641707181930542, "step": 2780 }, { "epoch": 0.73, "learning_rate": 1.0297976998930665e-06, "logits/chosen": -1.0089476108551025, "logits/rejected": 0.039520103484392166, "logps/chosen": -544.458251953125, "logps/rejected": -781.134521484375, "loss": 0.4407, "rewards/accuracies": 0.875, "rewards/chosen": -1.9217383861541748, "rewards/margins": 1.6432183980941772, "rewards/rejected": -3.5649566650390625, "step": 2790 }, { "epoch": 0.73, "learning_rate": 1.0113825582038078e-06, "logits/chosen": -0.9111200571060181, "logits/rejected": 0.09320324659347534, "logps/chosen": -485.19464111328125, "logps/rejected": -772.6851806640625, "loss": 0.3662, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -1.6882908344268799, "rewards/margins": 1.9764955043792725, "rewards/rejected": -3.6647861003875732, "step": 2800 }, { "epoch": 0.74, "learning_rate": 9.930917156425477e-07, "logits/chosen": -1.3273289203643799, "logits/rejected": 0.12840789556503296, "logps/chosen": -539.8751220703125, "logps/rejected": -770.5255126953125, "loss": 0.4417, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -1.8574135303497314, "rewards/margins": 1.6678911447525024, "rewards/rejected": -3.525304079055786, "step": 2810 }, { "epoch": 0.74, "learning_rate": 9.749266994893756e-07, "logits/chosen": -1.2743171453475952, "logits/rejected": -0.29028937220573425, "logps/chosen": -505.72515869140625, "logps/rejected": -708.1859130859375, "loss": 0.3927, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.6994062662124634, "rewards/margins": 1.5197365283966064, "rewards/rejected": -3.2191429138183594, "step": 2820 }, { "epoch": 0.74, "learning_rate": 9.56889026517913e-07, "logits/chosen": -1.2260441780090332, "logits/rejected": 0.2879168689250946, "logps/chosen": -580.123291015625, "logps/rejected": -747.0691528320312, "loss": 0.3703, "rewards/accuracies": 0.8125, "rewards/chosen": -2.0501606464385986, "rewards/margins": 1.359299659729004, "rewards/rejected": -3.4094605445861816, "step": 2830 }, { "epoch": 0.74, "learning_rate": 9.389802028686617e-07, "logits/chosen": -1.1472103595733643, "logits/rejected": 0.685859203338623, "logps/chosen": -564.8762817382812, "logps/rejected": -833.5897216796875, "loss": 0.3696, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.9314100742340088, "rewards/margins": 2.114525318145752, "rewards/rejected": -4.04593563079834, "step": 2840 }, { "epoch": 0.75, "learning_rate": 9.212017239232427e-07, "logits/chosen": -1.1055911779403687, "logits/rejected": 0.17128732800483704, "logps/chosen": -461.57757568359375, "logps/rejected": -762.3133544921875, "loss": 0.3902, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.596985101699829, "rewards/margins": 1.919716477394104, "rewards/rejected": -3.5167019367218018, "step": 2850 }, { "epoch": 0.75, "learning_rate": 9.03555074179533e-07, "logits/chosen": -0.5496357679367065, "logits/rejected": -0.5428146123886108, "logps/chosen": -548.8982543945312, "logps/rejected": -794.3575439453125, "loss": 0.4303, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -1.8840789794921875, "rewards/margins": 1.6221778392791748, "rewards/rejected": -3.506256580352783, "step": 2860 }, { "epoch": 0.75, "learning_rate": 8.860417271277067e-07, "logits/chosen": -1.2351821660995483, "logits/rejected": -0.3069414794445038, "logps/chosen": -512.6089477539062, "logps/rejected": -765.6232299804688, "loss": 0.3669, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.8200585842132568, "rewards/margins": 1.858338713645935, "rewards/rejected": -3.6783974170684814, "step": 2870 }, { "epoch": 0.75, "learning_rate": 8.686631451272029e-07, "logits/chosen": -0.9176104664802551, "logits/rejected": 0.11124134063720703, "logps/chosen": -530.0169067382812, "logps/rejected": -772.3027954101562, "loss": 0.3666, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.805767297744751, "rewards/margins": 1.8668705224990845, "rewards/rejected": -3.672637462615967, "step": 2880 }, { "epoch": 0.76, "learning_rate": 8.514207792846168e-07, "logits/chosen": -0.8543869853019714, "logits/rejected": -0.33468276262283325, "logps/chosen": -563.6397705078125, "logps/rejected": -726.9634399414062, "loss": 0.3891, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.042240619659424, "rewards/margins": 1.2924143075942993, "rewards/rejected": -3.3346545696258545, "step": 2890 }, { "epoch": 0.76, "learning_rate": 8.343160693325356e-07, "logits/chosen": -0.5689171552658081, "logits/rejected": -0.3701861500740051, "logps/chosen": -522.55078125, "logps/rejected": -799.8737182617188, "loss": 0.3666, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.9046812057495117, "rewards/margins": 1.847630262374878, "rewards/rejected": -3.7523112297058105, "step": 2900 }, { "epoch": 0.76, "learning_rate": 8.173504435093174e-07, "logits/chosen": -1.1127324104309082, "logits/rejected": 0.49559181928634644, "logps/chosen": -580.339599609375, "logps/rejected": -790.05419921875, "loss": 0.4036, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -2.1893162727355957, "rewards/margins": 1.816868543624878, "rewards/rejected": -4.0061845779418945, "step": 2910 }, { "epoch": 0.76, "learning_rate": 8.00525318439836e-07, "logits/chosen": -1.1705108880996704, "logits/rejected": 0.1877966821193695, "logps/chosen": -577.6532592773438, "logps/rejected": -775.518310546875, "loss": 0.5292, "rewards/accuracies": 0.8125, "rewards/chosen": -2.123340129852295, "rewards/margins": 1.601309061050415, "rewards/rejected": -3.724648952484131, "step": 2920 }, { "epoch": 0.77, "learning_rate": 7.838420990171927e-07, "logits/chosen": -0.675153374671936, "logits/rejected": -0.15449100732803345, "logps/chosen": -529.3169555664062, "logps/rejected": -849.1887817382812, "loss": 0.3346, "rewards/accuracies": 0.887499988079071, "rewards/chosen": -1.992500901222229, "rewards/margins": 2.1089320182800293, "rewards/rejected": -4.101432800292969, "step": 2930 }, { "epoch": 0.77, "learning_rate": 7.673021782854084e-07, "logits/chosen": -1.0276451110839844, "logits/rejected": 0.12752141058444977, "logps/chosen": -569.2623901367188, "logps/rejected": -797.4970703125, "loss": 0.4235, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.9917293787002563, "rewards/margins": 1.7630789279937744, "rewards/rejected": -3.7548089027404785, "step": 2940 }, { "epoch": 0.77, "learning_rate": 7.509069373231039e-07, "logits/chosen": -0.532359778881073, "logits/rejected": -0.37800487875938416, "logps/chosen": -572.4589233398438, "logps/rejected": -849.5089111328125, "loss": 0.4203, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -2.19490385055542, "rewards/margins": 1.7656660079956055, "rewards/rejected": -3.9605698585510254, "step": 2950 }, { "epoch": 0.77, "learning_rate": 7.346577451281822e-07, "logits/chosen": -0.9249979853630066, "logits/rejected": 0.5974918603897095, "logps/chosen": -583.9189453125, "logps/rejected": -770.7642822265625, "loss": 0.4452, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.059628486633301, "rewards/margins": 1.6830648183822632, "rewards/rejected": -3.7426934242248535, "step": 2960 }, { "epoch": 0.78, "learning_rate": 7.185559585035138e-07, "logits/chosen": -0.7646081447601318, "logits/rejected": -0.08741030842065811, "logps/chosen": -494.5181579589844, "logps/rejected": -819.3825073242188, "loss": 0.4152, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -1.7815096378326416, "rewards/margins": 2.074185848236084, "rewards/rejected": -3.8556952476501465, "step": 2970 }, { "epoch": 0.78, "learning_rate": 7.026029219436504e-07, "logits/chosen": -1.0040438175201416, "logits/rejected": -0.2370959222316742, "logps/chosen": -547.5843505859375, "logps/rejected": -775.7852783203125, "loss": 0.3377, "rewards/accuracies": 0.8125, "rewards/chosen": -1.8116861581802368, "rewards/margins": 1.7394940853118896, "rewards/rejected": -3.551180362701416, "step": 2980 }, { "epoch": 0.78, "learning_rate": 6.867999675225523e-07, "logits/chosen": -0.8310391306877136, "logits/rejected": -0.02760641649365425, "logps/chosen": -590.3953857421875, "logps/rejected": -830.1414184570312, "loss": 0.3744, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.0531251430511475, "rewards/margins": 1.790724515914917, "rewards/rejected": -3.843848705291748, "step": 2990 }, { "epoch": 0.79, "learning_rate": 6.711484147823663e-07, "logits/chosen": -1.0045768022537231, "logits/rejected": -0.32932716608047485, "logps/chosen": -519.6864013671875, "logps/rejected": -840.6529541015625, "loss": 0.3769, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -1.8191581964492798, "rewards/margins": 1.9201765060424805, "rewards/rejected": -3.7393341064453125, "step": 3000 }, { "epoch": 0.79, "eval_logits/chosen": 0.532638430595398, "eval_logits/rejected": 1.514159917831421, "eval_logps/chosen": -547.6463012695312, "eval_logps/rejected": -772.1762084960938, "eval_loss": 0.4205494523048401, "eval_rewards/accuracies": 0.8009999990463257, "eval_rewards/chosen": -1.919854760169983, "eval_rewards/margins": 1.6118818521499634, "eval_rewards/rejected": -3.5317368507385254, "eval_runtime": 1383.4868, "eval_samples_per_second": 1.446, "eval_steps_per_second": 0.361, "step": 3000 }, { "epoch": 0.79, "learning_rate": 6.556495706232413e-07, "logits/chosen": -1.1887633800506592, "logits/rejected": 0.10280628502368927, "logps/chosen": -592.3970947265625, "logps/rejected": -802.2684326171875, "loss": 0.4594, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.981632947921753, "rewards/margins": 1.690386414527893, "rewards/rejected": -3.6720194816589355, "step": 3010 }, { "epoch": 0.79, "learning_rate": 6.403047291942057e-07, "logits/chosen": -1.0731422901153564, "logits/rejected": -0.11721036583185196, "logps/chosen": -500.95733642578125, "logps/rejected": -810.4306030273438, "loss": 0.3202, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": -1.6846420764923096, "rewards/margins": 2.057126522064209, "rewards/rejected": -3.7417690753936768, "step": 3020 }, { "epoch": 0.79, "learning_rate": 6.251151717851023e-07, "logits/chosen": -0.5943460464477539, "logits/rejected": -0.3044959008693695, "logps/chosen": -552.7174682617188, "logps/rejected": -838.6080322265625, "loss": 0.3992, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.064213275909424, "rewards/margins": 1.780678153038025, "rewards/rejected": -3.844891309738159, "step": 3030 }, { "epoch": 0.8, "learning_rate": 6.100821667196041e-07, "logits/chosen": -0.9400644302368164, "logits/rejected": -0.20678548514842987, "logps/chosen": -539.8009033203125, "logps/rejected": -764.1534423828125, "loss": 0.3418, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.653412103652954, "rewards/margins": 1.8391025066375732, "rewards/rejected": -3.4925143718719482, "step": 3040 }, { "epoch": 0.8, "learning_rate": 5.952069692493062e-07, "logits/chosen": -0.7938990592956543, "logits/rejected": -0.40649691224098206, "logps/chosen": -596.44873046875, "logps/rejected": -842.193359375, "loss": 0.4439, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.0670700073242188, "rewards/margins": 1.7952619791030884, "rewards/rejected": -3.8623321056365967, "step": 3050 }, { "epoch": 0.8, "learning_rate": 5.80490821448918e-07, "logits/chosen": -0.9244669079780579, "logits/rejected": -0.5335060358047485, "logps/chosen": -596.2068481445312, "logps/rejected": -874.0540161132812, "loss": 0.391, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -2.2196078300476074, "rewards/margins": 1.8115314245224, "rewards/rejected": -4.031139373779297, "step": 3060 }, { "epoch": 0.8, "learning_rate": 5.659349521125459e-07, "logits/chosen": -0.6347008347511292, "logits/rejected": -0.5196251273155212, "logps/chosen": -535.7493286132812, "logps/rejected": -856.0029296875, "loss": 0.4562, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.9471817016601562, "rewards/margins": 1.7889111042022705, "rewards/rejected": -3.7360928058624268, "step": 3070 }, { "epoch": 0.81, "learning_rate": 5.5154057665109e-07, "logits/chosen": -1.1485843658447266, "logits/rejected": 0.004442277364432812, "logps/chosen": -522.5572509765625, "logps/rejected": -784.026123046875, "loss": 0.3419, "rewards/accuracies": 0.875, "rewards/chosen": -1.899583101272583, "rewards/margins": 1.6547266244888306, "rewards/rejected": -3.554309844970703, "step": 3080 }, { "epoch": 0.81, "learning_rate": 5.373088969907586e-07, "logits/chosen": -0.7392430901527405, "logits/rejected": -0.4351174235343933, "logps/chosen": -475.52294921875, "logps/rejected": -764.8416748046875, "loss": 0.3467, "rewards/accuracies": 0.887499988079071, "rewards/chosen": -1.6972386837005615, "rewards/margins": 1.8677946329116821, "rewards/rejected": -3.565033435821533, "step": 3090 }, { "epoch": 0.81, "learning_rate": 5.23241101472709e-07, "logits/chosen": -0.841932475566864, "logits/rejected": -0.11554646492004395, "logps/chosen": -619.42724609375, "logps/rejected": -848.4366455078125, "loss": 0.3589, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -2.211543560028076, "rewards/margins": 1.8801252841949463, "rewards/rejected": -4.091668605804443, "step": 3100 }, { "epoch": 0.81, "learning_rate": 5.09338364753818e-07, "logits/chosen": -1.0390411615371704, "logits/rejected": 0.6153230667114258, "logps/chosen": -606.2908325195312, "logps/rejected": -770.7852783203125, "loss": 0.5322, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.2032546997070312, "rewards/margins": 1.4495493173599243, "rewards/rejected": -3.652804136276245, "step": 3110 }, { "epoch": 0.82, "learning_rate": 4.956018477086005e-07, "logits/chosen": -0.8461894989013672, "logits/rejected": -0.3413206934928894, "logps/chosen": -537.2196655273438, "logps/rejected": -825.6803588867188, "loss": 0.3784, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.8306541442871094, "rewards/margins": 1.9641211032867432, "rewards/rejected": -3.7947754859924316, "step": 3120 }, { "epoch": 0.82, "learning_rate": 4.820326973322764e-07, "logits/chosen": -0.8142082095146179, "logits/rejected": -0.10756425559520721, "logps/chosen": -565.7686157226562, "logps/rejected": -817.3389892578125, "loss": 0.4628, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.146563768386841, "rewards/margins": 1.795789122581482, "rewards/rejected": -3.9423530101776123, "step": 3130 }, { "epoch": 0.82, "learning_rate": 4.686320466449981e-07, "logits/chosen": -0.9902013540267944, "logits/rejected": -0.15123017132282257, "logps/chosen": -511.6470642089844, "logps/rejected": -815.7200317382812, "loss": 0.3672, "rewards/accuracies": 0.887499988079071, "rewards/chosen": -1.8265291452407837, "rewards/margins": 2.014096975326538, "rewards/rejected": -3.8406262397766113, "step": 3140 }, { "epoch": 0.82, "learning_rate": 4.554010145972418e-07, "logits/chosen": -0.8389061689376831, "logits/rejected": -0.14444035291671753, "logps/chosen": -551.7734375, "logps/rejected": -798.772705078125, "loss": 0.4095, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.063991069793701, "rewards/margins": 1.5325976610183716, "rewards/rejected": -3.596588611602783, "step": 3150 }, { "epoch": 0.83, "learning_rate": 4.4234070597637455e-07, "logits/chosen": -0.8169253468513489, "logits/rejected": 0.03178207948803902, "logps/chosen": -529.4456787109375, "logps/rejected": -754.3330078125, "loss": 0.3978, "rewards/accuracies": 0.8125, "rewards/chosen": -2.110259771347046, "rewards/margins": 1.4216582775115967, "rewards/rejected": -3.5319180488586426, "step": 3160 }, { "epoch": 0.83, "learning_rate": 4.2945221131440783e-07, "logits/chosen": -0.7767388224601746, "logits/rejected": -0.2466239631175995, "logps/chosen": -586.1234130859375, "logps/rejected": -792.2913818359375, "loss": 0.4286, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -2.040156602859497, "rewards/margins": 1.661505937576294, "rewards/rejected": -3.701662063598633, "step": 3170 }, { "epoch": 0.83, "learning_rate": 4.167366067969381e-07, "logits/chosen": -0.8122004270553589, "logits/rejected": -0.4158555567264557, "logps/chosen": -579.0358276367188, "logps/rejected": -846.17626953125, "loss": 0.3993, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.079592227935791, "rewards/margins": 1.6793367862701416, "rewards/rejected": -3.758929491043091, "step": 3180 }, { "epoch": 0.83, "learning_rate": 4.041949541732826e-07, "logits/chosen": -1.0485798120498657, "logits/rejected": -0.36888834834098816, "logps/chosen": -630.3906860351562, "logps/rejected": -814.126953125, "loss": 0.4027, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -2.191230535507202, "rewards/margins": 1.4397324323654175, "rewards/rejected": -3.63096284866333, "step": 3190 }, { "epoch": 0.84, "learning_rate": 3.9182830066782614e-07, "logits/chosen": -0.8572107553482056, "logits/rejected": -0.33156412839889526, "logps/chosen": -506.94781494140625, "logps/rejected": -776.4071044921875, "loss": 0.348, "rewards/accuracies": 0.887499988079071, "rewards/chosen": -1.8441927433013916, "rewards/margins": 1.8142344951629639, "rewards/rejected": -3.6584274768829346, "step": 3200 }, { "epoch": 0.84, "learning_rate": 3.796376788925771e-07, "logits/chosen": -1.5727512836456299, "logits/rejected": 0.38068026304244995, "logps/chosen": -552.1102905273438, "logps/rejected": -777.140869140625, "loss": 0.4251, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.8679554462432861, "rewards/margins": 1.6639436483383179, "rewards/rejected": -3.5318992137908936, "step": 3210 }, { "epoch": 0.84, "learning_rate": 3.676241067609465e-07, "logits/chosen": -0.8122023344039917, "logits/rejected": 0.24542848765850067, "logps/chosen": -533.0785522460938, "logps/rejected": -790.8440551757812, "loss": 0.3919, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.8818871974945068, "rewards/margins": 1.9045559167861938, "rewards/rejected": -3.7864432334899902, "step": 3220 }, { "epoch": 0.85, "learning_rate": 3.5578858740274976e-07, "logits/chosen": -1.0608148574829102, "logits/rejected": -0.1883460134267807, "logps/chosen": -546.2855224609375, "logps/rejected": -784.9697265625, "loss": 0.425, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.9488483667373657, "rewards/margins": 1.45414400100708, "rewards/rejected": -3.4029927253723145, "step": 3230 }, { "epoch": 0.85, "learning_rate": 3.44132109080447e-07, "logits/chosen": -1.0673831701278687, "logits/rejected": -0.0653887614607811, "logps/chosen": -512.1033935546875, "logps/rejected": -733.269287109375, "loss": 0.4068, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.7813838720321655, "rewards/margins": 1.5279889106750488, "rewards/rejected": -3.309372663497925, "step": 3240 }, { "epoch": 0.85, "learning_rate": 3.3265564510662344e-07, "logits/chosen": -1.1314384937286377, "logits/rejected": -0.5283291935920715, "logps/chosen": -552.1278076171875, "logps/rejected": -818.47509765625, "loss": 0.4451, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.8823707103729248, "rewards/margins": 1.676279067993164, "rewards/rejected": -3.558649778366089, "step": 3250 }, { "epoch": 0.85, "learning_rate": 3.213601537627195e-07, "logits/chosen": -0.9113371968269348, "logits/rejected": -0.332157701253891, "logps/chosen": -565.0186767578125, "logps/rejected": -805.5143432617188, "loss": 0.4066, "rewards/accuracies": 0.8125, "rewards/chosen": -2.2068862915039062, "rewards/margins": 1.6060466766357422, "rewards/rejected": -3.8129334449768066, "step": 3260 }, { "epoch": 0.86, "learning_rate": 3.1024657821901063e-07, "logits/chosen": -0.7972087264060974, "logits/rejected": -0.6192452311515808, "logps/chosen": -460.00244140625, "logps/rejected": -661.6898193359375, "loss": 0.4204, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.722604513168335, "rewards/margins": 1.273694396018982, "rewards/rejected": -2.9962992668151855, "step": 3270 }, { "epoch": 0.86, "learning_rate": 2.9931584645585654e-07, "logits/chosen": -1.2535960674285889, "logits/rejected": 0.02778279222548008, "logps/chosen": -465.5445861816406, "logps/rejected": -718.2966918945312, "loss": 0.3941, "rewards/accuracies": 0.8125, "rewards/chosen": -1.620823860168457, "rewards/margins": 1.7875875234603882, "rewards/rejected": -3.4084110260009766, "step": 3280 }, { "epoch": 0.86, "learning_rate": 2.885688711862136e-07, "logits/chosen": -0.6595426201820374, "logits/rejected": -0.6082831621170044, "logps/chosen": -518.3923950195312, "logps/rejected": -844.9713745117188, "loss": 0.3628, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -2.020922899246216, "rewards/margins": 2.008241653442383, "rewards/rejected": -4.0291643142700195, "step": 3290 }, { "epoch": 0.86, "learning_rate": 2.7800654977942486e-07, "logits/chosen": -1.1551368236541748, "logits/rejected": 0.04624384641647339, "logps/chosen": -494.8173828125, "logps/rejected": -759.0722045898438, "loss": 0.3921, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.8496835231781006, "rewards/margins": 1.606042504310608, "rewards/rejected": -3.455725908279419, "step": 3300 }, { "epoch": 0.86, "eval_logits/chosen": 0.5531209707260132, "eval_logits/rejected": 1.5286740064620972, "eval_logps/chosen": -559.9616088867188, "eval_logps/rejected": -791.3992309570312, "eval_loss": 0.4215858280658722, "eval_rewards/accuracies": 0.8050000071525574, "eval_rewards/chosen": -2.043008327484131, "eval_rewards/margins": 1.680959939956665, "eval_rewards/rejected": -3.723968267440796, "eval_runtime": 1375.3072, "eval_samples_per_second": 1.454, "eval_steps_per_second": 0.364, "step": 3300 }, { "epoch": 0.87, "learning_rate": 2.6762976418628797e-07, "logits/chosen": -0.6151852011680603, "logits/rejected": -0.4355131983757019, "logps/chosen": -569.3573608398438, "logps/rejected": -845.0255737304688, "loss": 0.3994, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.9695743322372437, "rewards/margins": 1.774019479751587, "rewards/rejected": -3.743593692779541, "step": 3310 }, { "epoch": 0.87, "learning_rate": 2.5743938086541354e-07, "logits/chosen": -1.1306208372116089, "logits/rejected": -0.369152694940567, "logps/chosen": -528.9949951171875, "logps/rejected": -786.5325927734375, "loss": 0.4035, "rewards/accuracies": 0.875, "rewards/chosen": -1.8521554470062256, "rewards/margins": 2.019233226776123, "rewards/rejected": -3.8713886737823486, "step": 3320 }, { "epoch": 0.87, "learning_rate": 2.4743625071087574e-07, "logits/chosen": -0.9898750185966492, "logits/rejected": -0.0026702166069298983, "logps/chosen": -589.635498046875, "logps/rejected": -785.4796752929688, "loss": 0.481, "rewards/accuracies": 0.8125, "rewards/chosen": -2.26185941696167, "rewards/margins": 1.3372209072113037, "rewards/rejected": -3.5990803241729736, "step": 3330 }, { "epoch": 0.87, "learning_rate": 2.3762120898116498e-07, "logits/chosen": -1.0256075859069824, "logits/rejected": -0.4684371054172516, "logps/chosen": -513.4962158203125, "logps/rejected": -773.9256591796875, "loss": 0.489, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.8556444644927979, "rewards/margins": 1.662695288658142, "rewards/rejected": -3.5183398723602295, "step": 3340 }, { "epoch": 0.88, "learning_rate": 2.2799507522944048e-07, "logits/chosen": -1.2172832489013672, "logits/rejected": -0.7886725068092346, "logps/chosen": -603.7682495117188, "logps/rejected": -839.2596435546875, "loss": 0.3825, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.133934259414673, "rewards/margins": 1.56403386592865, "rewards/rejected": -3.6979682445526123, "step": 3350 }, { "epoch": 0.88, "learning_rate": 2.1855865323510056e-07, "logits/chosen": -0.9375013113021851, "logits/rejected": -0.4790850281715393, "logps/chosen": -529.7852783203125, "logps/rejected": -789.4864501953125, "loss": 0.3798, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -1.7933248281478882, "rewards/margins": 1.7892353534698486, "rewards/rejected": -3.5825603008270264, "step": 3360 }, { "epoch": 0.88, "learning_rate": 2.0931273093666575e-07, "logits/chosen": -1.0546633005142212, "logits/rejected": 0.11733438819646835, "logps/chosen": -566.7720947265625, "logps/rejected": -769.4909057617188, "loss": 0.4561, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.1270699501037598, "rewards/margins": 1.5025631189346313, "rewards/rejected": -3.6296334266662598, "step": 3370 }, { "epoch": 0.88, "learning_rate": 2.002580803659873e-07, "logits/chosen": -1.0718940496444702, "logits/rejected": -0.24651813507080078, "logps/chosen": -563.5213012695312, "logps/rejected": -746.7576904296875, "loss": 0.4189, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.0106587409973145, "rewards/margins": 1.5113112926483154, "rewards/rejected": -3.52197003364563, "step": 3380 }, { "epoch": 0.89, "learning_rate": 1.913954575837826e-07, "logits/chosen": -1.211354374885559, "logits/rejected": -0.7178353071212769, "logps/chosen": -547.396728515625, "logps/rejected": -785.0447998046875, "loss": 0.3956, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -1.859985113143921, "rewards/margins": 1.6019798517227173, "rewards/rejected": -3.4619648456573486, "step": 3390 }, { "epoch": 0.89, "learning_rate": 1.827256026165028e-07, "logits/chosen": -1.3360965251922607, "logits/rejected": -0.5832281112670898, "logps/chosen": -572.5958251953125, "logps/rejected": -827.7864379882812, "loss": 0.4079, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.8811748027801514, "rewards/margins": 1.826063871383667, "rewards/rejected": -3.7072386741638184, "step": 3400 }, { "epoch": 0.89, "learning_rate": 1.7424923939454274e-07, "logits/chosen": -1.0015101432800293, "logits/rejected": -0.4501993656158447, "logps/chosen": -516.7034912109375, "logps/rejected": -737.7506713867188, "loss": 0.3917, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.871683120727539, "rewards/margins": 1.2368314266204834, "rewards/rejected": -3.1085145473480225, "step": 3410 }, { "epoch": 0.9, "learning_rate": 1.6596707569179304e-07, "logits/chosen": -1.220655918121338, "logits/rejected": -0.36421042680740356, "logps/chosen": -664.6968994140625, "logps/rejected": -811.2049560546875, "loss": 0.5067, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.4624738693237305, "rewards/margins": 1.2412056922912598, "rewards/rejected": -3.7036795616149902, "step": 3420 }, { "epoch": 0.9, "learning_rate": 1.578798030665385e-07, "logits/chosen": -0.7742003202438354, "logits/rejected": -0.1663471907377243, "logps/chosen": -584.2454833984375, "logps/rejected": -789.2788696289062, "loss": 0.4325, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.159472703933716, "rewards/margins": 1.4603602886199951, "rewards/rejected": -3.619832992553711, "step": 3430 }, { "epoch": 0.9, "learning_rate": 1.499880968037165e-07, "logits/chosen": -0.9450448155403137, "logits/rejected": -0.24974000453948975, "logps/chosen": -543.8734130859375, "logps/rejected": -776.0538330078125, "loss": 0.3894, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.0840260982513428, "rewards/margins": 1.4753036499023438, "rewards/rejected": -3.5593299865722656, "step": 3440 }, { "epoch": 0.9, "learning_rate": 1.4229261585852805e-07, "logits/chosen": -1.1183446645736694, "logits/rejected": -0.8927680253982544, "logps/chosen": -516.3631591796875, "logps/rejected": -750.56884765625, "loss": 0.4428, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.7409288883209229, "rewards/margins": 1.517566442489624, "rewards/rejected": -3.258495330810547, "step": 3450 }, { "epoch": 0.91, "learning_rate": 1.3479400280141886e-07, "logits/chosen": -1.0318033695220947, "logits/rejected": -0.8086441159248352, "logps/chosen": -516.8532104492188, "logps/rejected": -741.1300659179688, "loss": 0.3884, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.8979904651641846, "rewards/margins": 1.477073311805725, "rewards/rejected": -3.37506365776062, "step": 3460 }, { "epoch": 0.91, "learning_rate": 1.2749288376442044e-07, "logits/chosen": -0.9179407358169556, "logits/rejected": -0.33625102043151855, "logps/chosen": -536.0686645507812, "logps/rejected": -847.4317626953125, "loss": 0.3721, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -1.9626020193099976, "rewards/margins": 1.956992745399475, "rewards/rejected": -3.9195950031280518, "step": 3470 }, { "epoch": 0.91, "learning_rate": 1.203898683888713e-07, "logits/chosen": -1.2370408773422241, "logits/rejected": -0.602063775062561, "logps/chosen": -547.3104858398438, "logps/rejected": -757.8153076171875, "loss": 0.4135, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -1.9618017673492432, "rewards/margins": 1.5197532176971436, "rewards/rejected": -3.4815547466278076, "step": 3480 }, { "epoch": 0.91, "learning_rate": 1.1348554977451132e-07, "logits/chosen": -0.8971865773200989, "logits/rejected": -0.45237869024276733, "logps/chosen": -549.7821044921875, "logps/rejected": -819.00439453125, "loss": 0.422, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -1.9521732330322266, "rewards/margins": 1.8261715173721313, "rewards/rejected": -3.7783446311950684, "step": 3490 }, { "epoch": 0.92, "learning_rate": 1.0678050442995802e-07, "logits/chosen": -0.5732991099357605, "logits/rejected": -0.1439836025238037, "logps/chosen": -517.4790649414062, "logps/rejected": -790.7830810546875, "loss": 0.3695, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.7863785028457642, "rewards/margins": 1.7641347646713257, "rewards/rejected": -3.550513505935669, "step": 3500 }, { "epoch": 0.92, "learning_rate": 1.0027529222456755e-07, "logits/chosen": -0.8079764246940613, "logits/rejected": -0.43154460191726685, "logps/chosen": -505.2015075683594, "logps/rejected": -735.8001708984375, "loss": 0.4543, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.861318588256836, "rewards/margins": 1.397134780883789, "rewards/rejected": -3.258453369140625, "step": 3510 }, { "epoch": 0.92, "learning_rate": 9.397045634168766e-08, "logits/chosen": -1.5205557346343994, "logits/rejected": -0.21061238646507263, "logps/chosen": -541.4683837890625, "logps/rejected": -778.14453125, "loss": 0.3821, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.8572975397109985, "rewards/margins": 1.6669906377792358, "rewards/rejected": -3.5242881774902344, "step": 3520 }, { "epoch": 0.92, "learning_rate": 8.78665232332998e-08, "logits/chosen": -1.0609691143035889, "logits/rejected": -0.4593663811683655, "logps/chosen": -509.010986328125, "logps/rejected": -796.3077392578125, "loss": 0.363, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.9296966791152954, "rewards/margins": 1.6614776849746704, "rewards/rejected": -3.591174364089966, "step": 3530 }, { "epoch": 0.93, "learning_rate": 8.196400257606208e-08, "logits/chosen": -0.8713966608047485, "logits/rejected": -0.9145506024360657, "logps/chosen": -527.7672729492188, "logps/rejected": -777.5648193359375, "loss": 0.4802, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.896409273147583, "rewards/margins": 1.6549888849258423, "rewards/rejected": -3.551398515701294, "step": 3540 }, { "epoch": 0.93, "learning_rate": 7.626338722875076e-08, "logits/chosen": -1.1116408109664917, "logits/rejected": -0.13830144703388214, "logps/chosen": -569.3795166015625, "logps/rejected": -845.25830078125, "loss": 0.3774, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.9394264221191406, "rewards/margins": 2.049816131591797, "rewards/rejected": -3.9892425537109375, "step": 3550 }, { "epoch": 0.93, "learning_rate": 7.076515319110688e-08, "logits/chosen": -0.9675251841545105, "logits/rejected": -0.38351327180862427, "logps/chosen": -519.7999267578125, "logps/rejected": -768.5065307617188, "loss": 0.3933, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -1.9502222537994385, "rewards/margins": 1.693610429763794, "rewards/rejected": -3.6438324451446533, "step": 3560 }, { "epoch": 0.93, "learning_rate": 6.54697595640899e-08, "logits/chosen": -0.8377977609634399, "logits/rejected": -0.5262192487716675, "logps/chosen": -530.46728515625, "logps/rejected": -769.6790771484375, "loss": 0.4231, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.032832622528076, "rewards/margins": 1.6006113290786743, "rewards/rejected": -3.633444309234619, "step": 3570 }, { "epoch": 0.94, "learning_rate": 6.037764851154426e-08, "logits/chosen": -0.985633373260498, "logits/rejected": 0.16631217300891876, "logps/chosen": -595.2657470703125, "logps/rejected": -853.1846923828125, "loss": 0.3474, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -1.9297056198120117, "rewards/margins": 1.8593957424163818, "rewards/rejected": -3.7891018390655518, "step": 3580 }, { "epoch": 0.94, "learning_rate": 5.548924522327748e-08, "logits/chosen": -1.016361951828003, "logits/rejected": -0.7056697607040405, "logps/chosen": -516.3272705078125, "logps/rejected": -732.7681274414062, "loss": 0.4643, "rewards/accuracies": 0.75, "rewards/chosen": -1.8100614547729492, "rewards/margins": 1.4326629638671875, "rewards/rejected": -3.242724657058716, "step": 3590 }, { "epoch": 0.94, "learning_rate": 5.0804957879556915e-08, "logits/chosen": -1.4801714420318604, "logits/rejected": -0.21010088920593262, "logps/chosen": -538.2376708984375, "logps/rejected": -716.570556640625, "loss": 0.4249, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.8412452936172485, "rewards/margins": 1.4520995616912842, "rewards/rejected": -3.2933449745178223, "step": 3600 }, { "epoch": 0.94, "eval_logits/chosen": 0.3916805386543274, "eval_logits/rejected": 1.3532880544662476, "eval_logps/chosen": -551.5703735351562, "eval_logps/rejected": -777.8283081054688, "eval_loss": 0.4203905165195465, "eval_rewards/accuracies": 0.800000011920929, "eval_rewards/chosen": -1.9590957164764404, "eval_rewards/margins": 1.6291632652282715, "eval_rewards/rejected": -3.588258981704712, "eval_runtime": 1380.3242, "eval_samples_per_second": 1.449, "eval_steps_per_second": 0.362, "step": 3600 }, { "epoch": 0.94, "learning_rate": 4.632517761702815e-08, "logits/chosen": -1.3528892993927002, "logits/rejected": 0.41183796525001526, "logps/chosen": -596.6740112304688, "logps/rejected": -815.2229614257812, "loss": 0.3384, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -2.033761739730835, "rewards/margins": 1.7981764078140259, "rewards/rejected": -3.8319382667541504, "step": 3610 }, { "epoch": 0.95, "learning_rate": 4.205027849605359e-08, "logits/chosen": -0.6885486245155334, "logits/rejected": -0.6011554002761841, "logps/chosen": -501.90008544921875, "logps/rejected": -728.01416015625, "loss": 0.3797, "rewards/accuracies": 0.8125, "rewards/chosen": -1.8948495388031006, "rewards/margins": 1.430154800415039, "rewards/rejected": -3.3250045776367188, "step": 3620 }, { "epoch": 0.95, "learning_rate": 3.798061746947995e-08, "logits/chosen": -1.1346218585968018, "logits/rejected": -0.2788551449775696, "logps/chosen": -572.7951049804688, "logps/rejected": -818.1143798828125, "loss": 0.3816, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.859471321105957, "rewards/margins": 1.9316068887710571, "rewards/rejected": -3.7910780906677246, "step": 3630 }, { "epoch": 0.95, "learning_rate": 3.411653435283158e-08, "logits/chosen": -1.2903286218643188, "logits/rejected": -0.4007846713066101, "logps/chosen": -612.4331665039062, "logps/rejected": -790.9478759765625, "loss": 0.4783, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -1.9661962985992432, "rewards/margins": 1.4086124897003174, "rewards/rejected": -3.3748087882995605, "step": 3640 }, { "epoch": 0.96, "learning_rate": 3.04583517959367e-08, "logits/chosen": -1.4373764991760254, "logits/rejected": -0.18271857500076294, "logps/chosen": -567.571533203125, "logps/rejected": -814.9035034179688, "loss": 0.372, "rewards/accuracies": 0.8125, "rewards/chosen": -1.9404630661010742, "rewards/margins": 1.8180534839630127, "rewards/rejected": -3.758516788482666, "step": 3650 }, { "epoch": 0.96, "learning_rate": 2.7006375255985984e-08, "logits/chosen": -0.8399646878242493, "logits/rejected": -0.41886812448501587, "logps/chosen": -487.2755432128906, "logps/rejected": -792.0075073242188, "loss": 0.3617, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.7799431085586548, "rewards/margins": 2.1151885986328125, "rewards/rejected": -3.895132064819336, "step": 3660 }, { "epoch": 0.96, "learning_rate": 2.3760892972027328e-08, "logits/chosen": -0.7552706003189087, "logits/rejected": -0.37330105900764465, "logps/chosen": -550.1450805664062, "logps/rejected": -821.5558471679688, "loss": 0.4513, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -2.0513758659362793, "rewards/margins": 1.6616461277008057, "rewards/rejected": -3.713021755218506, "step": 3670 }, { "epoch": 0.96, "learning_rate": 2.072217594089765e-08, "logits/chosen": -1.048200249671936, "logits/rejected": -0.7420127987861633, "logps/chosen": -548.27587890625, "logps/rejected": -818.7357788085938, "loss": 0.4605, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.8455013036727905, "rewards/margins": 1.6794688701629639, "rewards/rejected": -3.524970293045044, "step": 3680 }, { "epoch": 0.97, "learning_rate": 1.789047789459375e-08, "logits/chosen": -0.8459684252738953, "logits/rejected": -0.16287431120872498, "logps/chosen": -508.4056091308594, "logps/rejected": -766.1015625, "loss": 0.3671, "rewards/accuracies": 0.875, "rewards/chosen": -1.6832281351089478, "rewards/margins": 1.7482646703720093, "rewards/rejected": -3.431492567062378, "step": 3690 }, { "epoch": 0.97, "learning_rate": 1.5266035279088708e-08, "logits/chosen": -0.6956599354743958, "logits/rejected": -0.6482317447662354, "logps/chosen": -574.1497802734375, "logps/rejected": -854.5062255859375, "loss": 0.3785, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.2222962379455566, "rewards/margins": 1.6956876516342163, "rewards/rejected": -3.9179844856262207, "step": 3700 }, { "epoch": 0.97, "learning_rate": 1.2849067234584623e-08, "logits/chosen": -0.6731444001197815, "logits/rejected": -0.2936319410800934, "logps/chosen": -508.5874938964844, "logps/rejected": -779.5755004882812, "loss": 0.3793, "rewards/accuracies": 0.887499988079071, "rewards/chosen": -1.7908990383148193, "rewards/margins": 1.8119395971298218, "rewards/rejected": -3.6028385162353516, "step": 3710 }, { "epoch": 0.97, "learning_rate": 1.0639775577218625e-08, "logits/chosen": -1.1039248704910278, "logits/rejected": -0.3895350396633148, "logps/chosen": -532.2550048828125, "logps/rejected": -799.4228515625, "loss": 0.3993, "rewards/accuracies": 0.8125, "rewards/chosen": -1.914385437965393, "rewards/margins": 1.754346489906311, "rewards/rejected": -3.668731689453125, "step": 3720 }, { "epoch": 0.98, "learning_rate": 8.638344782207486e-09, "logits/chosen": -1.1005773544311523, "logits/rejected": -0.48734521865844727, "logps/chosen": -638.8570556640625, "logps/rejected": -876.8406372070312, "loss": 0.441, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.3087477684020996, "rewards/margins": 1.5587693452835083, "rewards/rejected": -3.8675169944763184, "step": 3730 }, { "epoch": 0.98, "learning_rate": 6.84494196844715e-09, "logits/chosen": -1.2625192403793335, "logits/rejected": -0.4201609194278717, "logps/chosen": -579.9595336914062, "logps/rejected": -743.4432373046875, "loss": 0.4261, "rewards/accuracies": 0.8125, "rewards/chosen": -2.0454494953155518, "rewards/margins": 1.329736351966858, "rewards/rejected": -3.37518572807312, "step": 3740 }, { "epoch": 0.98, "learning_rate": 5.259716884556121e-09, "logits/chosen": -1.0971883535385132, "logits/rejected": 0.13197267055511475, "logps/chosen": -497.7351989746094, "logps/rejected": -844.2490234375, "loss": 0.3651, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -1.8625205755233765, "rewards/margins": 2.320369243621826, "rewards/rejected": -4.182889461517334, "step": 3750 }, { "epoch": 0.98, "learning_rate": 3.882801896372967e-09, "logits/chosen": -1.2590482234954834, "logits/rejected": 0.06436818838119507, "logps/chosen": -604.3903198242188, "logps/rejected": -838.1032104492188, "loss": 0.3897, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.1124839782714844, "rewards/margins": 1.6700093746185303, "rewards/rejected": -3.7824935913085938, "step": 3760 }, { "epoch": 0.99, "learning_rate": 2.7143119759026614e-09, "logits/chosen": -0.8534577488899231, "logits/rejected": -0.31645748019218445, "logps/chosen": -536.9246215820312, "logps/rejected": -888.1343994140625, "loss": 0.2884, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.8014726638793945, "rewards/margins": 2.4648687839508057, "rewards/rejected": -4.266341209411621, "step": 3770 }, { "epoch": 0.99, "learning_rate": 1.754344691717591e-09, "logits/chosen": -1.0792902708053589, "logits/rejected": -0.5682160258293152, "logps/chosen": -508.158447265625, "logps/rejected": -773.3580932617188, "loss": 0.3699, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.8245254755020142, "rewards/margins": 1.8749605417251587, "rewards/rejected": -3.6994857788085938, "step": 3780 }, { "epoch": 0.99, "learning_rate": 1.0029802008096335e-09, "logits/chosen": -0.6716892719268799, "logits/rejected": -0.40372976660728455, "logps/chosen": -489.3829040527344, "logps/rejected": -723.220703125, "loss": 0.4356, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.7527339458465576, "rewards/margins": 1.6192394495010376, "rewards/rejected": -3.371973752975464, "step": 3790 }, { "epoch": 0.99, "learning_rate": 4.602812418974534e-10, "logits/chosen": -0.8607047200202942, "logits/rejected": -0.9630060195922852, "logps/chosen": -502.1089782714844, "logps/rejected": -731.4326171875, "loss": 0.4389, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.8323230743408203, "rewards/margins": 1.3677552938461304, "rewards/rejected": -3.200078248977661, "step": 3800 }, { "epoch": 1.0, "learning_rate": 1.2629313018819312e-10, "logits/chosen": -0.9664648175239563, "logits/rejected": 0.03846656158566475, "logps/chosen": -550.194091796875, "logps/rejected": -774.918212890625, "loss": 0.385, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -1.9849185943603516, "rewards/margins": 1.5859264135360718, "rewards/rejected": -3.5708446502685547, "step": 3810 }, { "epoch": 1.0, "learning_rate": 1.0437535929996855e-12, "logits/chosen": -1.1103519201278687, "logits/rejected": -0.32411596179008484, "logps/chosen": -598.4796752929688, "logps/rejected": -901.5104370117188, "loss": 0.4032, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -2.1410813331604004, "rewards/margins": 1.9627602100372314, "rewards/rejected": -4.103841304779053, "step": 3820 }, { "epoch": 1.0, "step": 3821, "total_flos": 0.0, "train_loss": 0.43769783746998087, "train_runtime": 91189.3347, "train_samples_per_second": 0.67, "train_steps_per_second": 0.042 } ], "logging_steps": 10, "max_steps": 3821, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 400, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }