{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 400, "global_step": 12465, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.009623095429029e-10, "logits/chosen": -3.029554605484009, "logits/rejected": -2.958740711212158, "logps/chosen": -239.6302947998047, "logps/rejected": -134.69642639160156, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 4.0096230954290295e-09, "logits/chosen": -2.757606029510498, "logits/rejected": -2.850358724594116, "logps/chosen": -248.6219024658203, "logps/rejected": -237.16183471679688, "loss": 0.6953, "rewards/accuracies": 0.5, "rewards/chosen": -0.009159507229924202, "rewards/margins": 0.0038133251946419477, "rewards/rejected": -0.012972831726074219, "step": 10 }, { "epoch": 0.0, "learning_rate": 8.019246190858059e-09, "logits/chosen": -2.8561160564422607, "logits/rejected": -2.731553792953491, "logps/chosen": -255.89724731445312, "logps/rejected": -124.84513854980469, "loss": 0.6937, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.001197890960611403, "rewards/margins": 0.011117557995021343, "rewards/rejected": -0.00991966761648655, "step": 20 }, { "epoch": 0.01, "learning_rate": 1.2028869286287089e-08, "logits/chosen": -2.9133946895599365, "logits/rejected": -2.9325616359710693, "logps/chosen": -334.1841735839844, "logps/rejected": -296.2657775878906, "loss": 0.6968, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.004535389598459005, "rewards/margins": 0.010829145088791847, "rewards/rejected": -0.006293755955994129, "step": 30 }, { "epoch": 0.01, "learning_rate": 1.6038492381716118e-08, "logits/chosen": -2.9462289810180664, "logits/rejected": -2.871635913848877, "logps/chosen": -257.1698303222656, "logps/rejected": -245.908203125, "loss": 0.6905, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0012525601778179407, "rewards/margins": 0.00830057729035616, "rewards/rejected": -0.009553136304020882, "step": 40 }, { "epoch": 0.01, "learning_rate": 2.0048115477145146e-08, "logits/chosen": -2.7116570472717285, "logits/rejected": -2.7510933876037598, "logps/chosen": -302.39923095703125, "logps/rejected": -288.64300537109375, "loss": 0.6959, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.009699945338070393, "rewards/margins": 0.017064867541193962, "rewards/rejected": -0.007364921271800995, "step": 50 }, { "epoch": 0.01, "learning_rate": 2.4057738572574177e-08, "logits/chosen": -2.752192974090576, "logits/rejected": -2.6760923862457275, "logps/chosen": -241.3775177001953, "logps/rejected": -285.0107421875, "loss": 0.6864, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.005488119088113308, "rewards/margins": 0.01543651707470417, "rewards/rejected": -0.009948397055268288, "step": 60 }, { "epoch": 0.02, "learning_rate": 2.8067361668003205e-08, "logits/chosen": -2.8234877586364746, "logits/rejected": -2.7496979236602783, "logps/chosen": -296.32574462890625, "logps/rejected": -217.22366333007812, "loss": 0.6948, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0016764644533395767, "rewards/margins": 0.0072411722503602505, "rewards/rejected": -0.008917637169361115, "step": 70 }, { "epoch": 0.02, "learning_rate": 3.2076984763432236e-08, "logits/chosen": -2.7752678394317627, "logits/rejected": -2.7286124229431152, "logps/chosen": -152.26710510253906, "logps/rejected": -170.56275939941406, "loss": 0.6873, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.008753329515457153, "rewards/margins": 0.0059437938034534454, "rewards/rejected": -0.014697122387588024, "step": 80 }, { "epoch": 0.02, "learning_rate": 3.608660785886127e-08, "logits/chosen": -2.857790231704712, "logits/rejected": -2.786897897720337, "logps/chosen": -207.5873565673828, "logps/rejected": -227.80184936523438, "loss": 0.6882, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.014288626611232758, "rewards/margins": 0.009933690540492535, "rewards/rejected": -0.024222319945693016, "step": 90 }, { "epoch": 0.02, "learning_rate": 4.009623095429029e-08, "logits/chosen": -2.7477333545684814, "logits/rejected": -2.76132869720459, "logps/chosen": -289.3299865722656, "logps/rejected": -196.60256958007812, "loss": 0.6831, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.009175291284918785, "rewards/margins": 0.05031620338559151, "rewards/rejected": -0.05949149280786514, "step": 100 }, { "epoch": 0.03, "learning_rate": 4.410585404971932e-08, "logits/chosen": -2.8593764305114746, "logits/rejected": -2.8490402698516846, "logps/chosen": -259.27545166015625, "logps/rejected": -272.4792175292969, "loss": 0.6865, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.010402527637779713, "rewards/margins": 0.031852759420871735, "rewards/rejected": -0.04225528985261917, "step": 110 }, { "epoch": 0.03, "learning_rate": 4.8115477145148354e-08, "logits/chosen": -2.828747510910034, "logits/rejected": -2.7842514514923096, "logps/chosen": -273.6574401855469, "logps/rejected": -270.96533203125, "loss": 0.6821, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.010771388188004494, "rewards/margins": 0.05202381685376167, "rewards/rejected": -0.06279521435499191, "step": 120 }, { "epoch": 0.03, "learning_rate": 5.2125100240577385e-08, "logits/chosen": -2.915553569793701, "logits/rejected": -2.8389952182769775, "logps/chosen": -262.3766784667969, "logps/rejected": -256.4173583984375, "loss": 0.6822, "rewards/accuracies": 0.75, "rewards/chosen": -0.024069635197520256, "rewards/margins": 0.03253183513879776, "rewards/rejected": -0.056601472198963165, "step": 130 }, { "epoch": 0.03, "learning_rate": 5.613472333600641e-08, "logits/chosen": -2.9118704795837402, "logits/rejected": -2.927285671234131, "logps/chosen": -158.02059936523438, "logps/rejected": -209.7635955810547, "loss": 0.6717, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.04194178432226181, "rewards/margins": 0.027061814442276955, "rewards/rejected": -0.06900360435247421, "step": 140 }, { "epoch": 0.04, "learning_rate": 6.014434643143545e-08, "logits/chosen": -2.893004894256592, "logits/rejected": -2.880948543548584, "logps/chosen": -202.68736267089844, "logps/rejected": -205.7903594970703, "loss": 0.6815, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.023197593167424202, "rewards/margins": 0.04789852350950241, "rewards/rejected": -0.07109610736370087, "step": 150 }, { "epoch": 0.04, "learning_rate": 6.415396952686447e-08, "logits/chosen": -2.9180562496185303, "logits/rejected": -2.7946887016296387, "logps/chosen": -332.65960693359375, "logps/rejected": -238.1708526611328, "loss": 0.6603, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.01652655564248562, "rewards/margins": 0.008368945680558681, "rewards/rejected": -0.024895502254366875, "step": 160 }, { "epoch": 0.04, "learning_rate": 6.81635926222935e-08, "logits/chosen": -2.9096858501434326, "logits/rejected": -2.9310355186462402, "logps/chosen": -251.33975219726562, "logps/rejected": -240.75460815429688, "loss": 0.6601, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.013797881081700325, "rewards/margins": 0.13765253126621246, "rewards/rejected": -0.15145042538642883, "step": 170 }, { "epoch": 0.04, "learning_rate": 7.217321571772253e-08, "logits/chosen": -2.988015651702881, "logits/rejected": -2.9767017364501953, "logps/chosen": -212.15673828125, "logps/rejected": -159.58204650878906, "loss": 0.6581, "rewards/accuracies": 0.5, "rewards/chosen": -0.10063859075307846, "rewards/margins": 0.07325638830661774, "rewards/rejected": -0.1738949865102768, "step": 180 }, { "epoch": 0.05, "learning_rate": 7.618283881315156e-08, "logits/chosen": -2.933257818222046, "logits/rejected": -2.860905170440674, "logps/chosen": -324.5125427246094, "logps/rejected": -359.4278564453125, "loss": 0.6641, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.020703891292214394, "rewards/margins": 0.08283446729183197, "rewards/rejected": -0.10353837162256241, "step": 190 }, { "epoch": 0.05, "learning_rate": 8.019246190858058e-08, "logits/chosen": -2.900707721710205, "logits/rejected": -2.8246498107910156, "logps/chosen": -203.93099975585938, "logps/rejected": -252.4442138671875, "loss": 0.6745, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.07562927901744843, "rewards/margins": 0.02675458788871765, "rewards/rejected": -0.10238387435674667, "step": 200 }, { "epoch": 0.05, "learning_rate": 8.420208500400962e-08, "logits/chosen": -2.7394564151763916, "logits/rejected": -2.7282943725585938, "logps/chosen": -197.1373748779297, "logps/rejected": -226.32382202148438, "loss": 0.6562, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07417772710323334, "rewards/margins": 0.05352227762341499, "rewards/rejected": -0.12770001590251923, "step": 210 }, { "epoch": 0.05, "learning_rate": 8.821170809943865e-08, "logits/chosen": -2.844759941101074, "logits/rejected": -2.826810359954834, "logps/chosen": -191.80062866210938, "logps/rejected": -221.77932739257812, "loss": 0.6344, "rewards/accuracies": 0.5, "rewards/chosen": -0.08790848404169083, "rewards/margins": 0.021957406774163246, "rewards/rejected": -0.10986590385437012, "step": 220 }, { "epoch": 0.06, "learning_rate": 9.222133119486767e-08, "logits/chosen": -2.957456111907959, "logits/rejected": -2.835681438446045, "logps/chosen": -322.5172119140625, "logps/rejected": -253.207763671875, "loss": 0.6182, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.04191254824399948, "rewards/margins": 0.16338881850242615, "rewards/rejected": -0.12147627025842667, "step": 230 }, { "epoch": 0.06, "learning_rate": 9.623095429029671e-08, "logits/chosen": -2.9493839740753174, "logits/rejected": -2.945842981338501, "logps/chosen": -256.1016540527344, "logps/rejected": -189.64895629882812, "loss": 0.6458, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.042133476585149765, "rewards/margins": 0.1592259705066681, "rewards/rejected": -0.20135946571826935, "step": 240 }, { "epoch": 0.06, "learning_rate": 1.0024057738572573e-07, "logits/chosen": -2.8034961223602295, "logits/rejected": -2.8224565982818604, "logps/chosen": -252.3696746826172, "logps/rejected": -198.51544189453125, "loss": 0.6295, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.037950549274683, "rewards/margins": 0.15652048587799072, "rewards/rejected": -0.11856994777917862, "step": 250 }, { "epoch": 0.06, "learning_rate": 1.0425020048115477e-07, "logits/chosen": -2.8333163261413574, "logits/rejected": -2.799535036087036, "logps/chosen": -198.86106872558594, "logps/rejected": -193.21127319335938, "loss": 0.6362, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.02170318178832531, "rewards/margins": 0.11439894139766693, "rewards/rejected": -0.1361021101474762, "step": 260 }, { "epoch": 0.06, "learning_rate": 1.082598235765838e-07, "logits/chosen": -2.939605712890625, "logits/rejected": -2.8323261737823486, "logps/chosen": -253.91775512695312, "logps/rejected": -242.62594604492188, "loss": 0.6529, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.12199263274669647, "rewards/margins": 0.14367853105068207, "rewards/rejected": -0.2656711935997009, "step": 270 }, { "epoch": 0.07, "learning_rate": 1.1226944667201282e-07, "logits/chosen": -2.705425977706909, "logits/rejected": -2.727578639984131, "logps/chosen": -156.19100952148438, "logps/rejected": -240.2871856689453, "loss": 0.6291, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.002952949609607458, "rewards/margins": 0.07988782227039337, "rewards/rejected": -0.08284077048301697, "step": 280 }, { "epoch": 0.07, "learning_rate": 1.1627906976744186e-07, "logits/chosen": -2.821107864379883, "logits/rejected": -2.7760562896728516, "logps/chosen": -275.28216552734375, "logps/rejected": -237.4905548095703, "loss": 0.6239, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.10767307132482529, "rewards/margins": 0.12761202454566956, "rewards/rejected": -0.23528508841991425, "step": 290 }, { "epoch": 0.07, "learning_rate": 1.202886928628709e-07, "logits/chosen": -2.8041632175445557, "logits/rejected": -2.76464581489563, "logps/chosen": -301.4250183105469, "logps/rejected": -406.40057373046875, "loss": 0.6509, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.0919116735458374, "rewards/margins": 0.24763791263103485, "rewards/rejected": -0.15572626888751984, "step": 300 }, { "epoch": 0.07, "learning_rate": 1.242983159582999e-07, "logits/chosen": -2.811683177947998, "logits/rejected": -2.7772467136383057, "logps/chosen": -222.76901245117188, "logps/rejected": -203.6343536376953, "loss": 0.6285, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.10157088935375214, "rewards/margins": 0.0654078871011734, "rewards/rejected": -0.16697879135608673, "step": 310 }, { "epoch": 0.08, "learning_rate": 1.2830793905372894e-07, "logits/chosen": -2.947110414505005, "logits/rejected": -2.8601386547088623, "logps/chosen": -289.3692321777344, "logps/rejected": -234.7985382080078, "loss": 0.6254, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.15501949191093445, "rewards/margins": 0.17323021590709686, "rewards/rejected": -0.018210697919130325, "step": 320 }, { "epoch": 0.08, "learning_rate": 1.3231756214915798e-07, "logits/chosen": -2.9331088066101074, "logits/rejected": -2.865115165710449, "logps/chosen": -369.88671875, "logps/rejected": -301.6773986816406, "loss": 0.5713, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.22617539763450623, "rewards/margins": 0.504052996635437, "rewards/rejected": -0.2778776288032532, "step": 330 }, { "epoch": 0.08, "learning_rate": 1.36327185244587e-07, "logits/chosen": -2.645563840866089, "logits/rejected": -2.539353609085083, "logps/chosen": -209.6802978515625, "logps/rejected": -169.13308715820312, "loss": 0.5846, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.032310713082551956, "rewards/margins": 0.2370806187391281, "rewards/rejected": -0.26939135789871216, "step": 340 }, { "epoch": 0.08, "learning_rate": 1.4033680834001603e-07, "logits/chosen": -2.581049919128418, "logits/rejected": -2.5333192348480225, "logps/chosen": -225.43392944335938, "logps/rejected": -159.21926879882812, "loss": 0.5605, "rewards/accuracies": 0.75, "rewards/chosen": 0.19601905345916748, "rewards/margins": 0.25073686242103577, "rewards/rejected": -0.05471784994006157, "step": 350 }, { "epoch": 0.09, "learning_rate": 1.4434643143544507e-07, "logits/chosen": -2.9410252571105957, "logits/rejected": -2.8339309692382812, "logps/chosen": -280.1285705566406, "logps/rejected": -271.2792663574219, "loss": 0.5662, "rewards/accuracies": 0.75, "rewards/chosen": 0.17516747117042542, "rewards/margins": 0.35865694284439087, "rewards/rejected": -0.18348945677280426, "step": 360 }, { "epoch": 0.09, "learning_rate": 1.483560545308741e-07, "logits/chosen": -2.8755431175231934, "logits/rejected": -2.8623225688934326, "logps/chosen": -187.56100463867188, "logps/rejected": -187.7587432861328, "loss": 0.5817, "rewards/accuracies": 0.75, "rewards/chosen": 0.26764410734176636, "rewards/margins": 0.26760828495025635, "rewards/rejected": 3.5798548196908087e-05, "step": 370 }, { "epoch": 0.09, "learning_rate": 1.5236567762630312e-07, "logits/chosen": -2.897462844848633, "logits/rejected": -2.8069605827331543, "logps/chosen": -255.22653198242188, "logps/rejected": -255.3626708984375, "loss": 0.5824, "rewards/accuracies": 0.75, "rewards/chosen": 0.4705290198326111, "rewards/margins": 0.3691382110118866, "rewards/rejected": 0.10139081627130508, "step": 380 }, { "epoch": 0.09, "learning_rate": 1.5637530072173216e-07, "logits/chosen": -2.919511318206787, "logits/rejected": -2.8042795658111572, "logps/chosen": -325.6533508300781, "logps/rejected": -187.78512573242188, "loss": 0.5513, "rewards/accuracies": 0.75, "rewards/chosen": 0.4940560460090637, "rewards/margins": 0.7009488344192505, "rewards/rejected": -0.20689284801483154, "step": 390 }, { "epoch": 0.1, "learning_rate": 1.6038492381716117e-07, "logits/chosen": -2.9202237129211426, "logits/rejected": -2.9232850074768066, "logps/chosen": -247.07302856445312, "logps/rejected": -275.75836181640625, "loss": 0.5994, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.41024351119995117, "rewards/margins": 0.20150327682495117, "rewards/rejected": 0.2087402641773224, "step": 400 }, { "epoch": 0.1, "eval_logits/chosen": -2.648510456085205, "eval_logits/rejected": -2.6271843910217285, "eval_logps/chosen": -198.87437438964844, "eval_logps/rejected": -189.90797424316406, "eval_loss": 0.5895335674285889, "eval_rewards/accuracies": 0.5950000286102295, "eval_rewards/chosen": 0.3053191304206848, "eval_rewards/margins": 0.34303680062294006, "eval_rewards/rejected": -0.037717677652835846, "eval_runtime": 132.8101, "eval_samples_per_second": 23.763, "eval_steps_per_second": 0.376, "step": 400 }, { "epoch": 0.1, "learning_rate": 1.6439454691259023e-07, "logits/chosen": -2.824357271194458, "logits/rejected": -2.741654872894287, "logps/chosen": -304.6470031738281, "logps/rejected": -313.9639587402344, "loss": 0.564, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.6792846918106079, "rewards/margins": 0.5655020475387573, "rewards/rejected": 0.11378266662359238, "step": 410 }, { "epoch": 0.1, "learning_rate": 1.6840417000801924e-07, "logits/chosen": -2.782989978790283, "logits/rejected": -2.753141403198242, "logps/chosen": -279.14300537109375, "logps/rejected": -236.7024383544922, "loss": 0.6065, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.519902765750885, "rewards/margins": 0.45831745862960815, "rewards/rejected": 0.06158534437417984, "step": 420 }, { "epoch": 0.1, "learning_rate": 1.7241379310344828e-07, "logits/chosen": -2.579185724258423, "logits/rejected": -2.7632734775543213, "logps/chosen": -236.32113647460938, "logps/rejected": -315.44256591796875, "loss": 0.5608, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.2255411595106125, "rewards/margins": 0.11108832061290741, "rewards/rejected": 0.1144527941942215, "step": 430 }, { "epoch": 0.11, "learning_rate": 1.764234161988773e-07, "logits/chosen": -2.7754387855529785, "logits/rejected": -2.6534359455108643, "logps/chosen": -233.37680053710938, "logps/rejected": -227.4702911376953, "loss": 0.5559, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.4115857481956482, "rewards/margins": 1.0299506187438965, "rewards/rejected": -0.6183647513389587, "step": 440 }, { "epoch": 0.11, "learning_rate": 1.8043303929430633e-07, "logits/chosen": -2.7775425910949707, "logits/rejected": -2.770744800567627, "logps/chosen": -174.74822998046875, "logps/rejected": -169.72979736328125, "loss": 0.5884, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.07133658230304718, "rewards/margins": 0.10153523832559586, "rewards/rejected": -0.17287181317806244, "step": 450 }, { "epoch": 0.11, "learning_rate": 1.8444266238973534e-07, "logits/chosen": -2.8076884746551514, "logits/rejected": -2.8743953704833984, "logps/chosen": -254.01199340820312, "logps/rejected": -266.43310546875, "loss": 0.6159, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.005350641906261444, "rewards/margins": 0.5136643648147583, "rewards/rejected": -0.5083136558532715, "step": 460 }, { "epoch": 0.11, "learning_rate": 1.884522854851644e-07, "logits/chosen": -2.820117235183716, "logits/rejected": -2.7825677394866943, "logps/chosen": -243.0380401611328, "logps/rejected": -180.35055541992188, "loss": 0.6442, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.10030399262905121, "rewards/margins": 0.535255491733551, "rewards/rejected": -0.43495145440101624, "step": 470 }, { "epoch": 0.12, "learning_rate": 1.9246190858059342e-07, "logits/chosen": -2.9516303539276123, "logits/rejected": -2.877495527267456, "logps/chosen": -273.7452392578125, "logps/rejected": -231.18374633789062, "loss": 0.5541, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.4644753932952881, "rewards/margins": 0.6943734884262085, "rewards/rejected": -0.229898139834404, "step": 480 }, { "epoch": 0.12, "learning_rate": 1.9647153167602245e-07, "logits/chosen": -2.7269504070281982, "logits/rejected": -2.774722099304199, "logps/chosen": -157.05789184570312, "logps/rejected": -270.0483093261719, "loss": 0.5355, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.2008717954158783, "rewards/margins": 0.375074177980423, "rewards/rejected": -0.1742023378610611, "step": 490 }, { "epoch": 0.12, "learning_rate": 2.0048115477145147e-07, "logits/chosen": -2.659719944000244, "logits/rejected": -2.689526319503784, "logps/chosen": -288.8984375, "logps/rejected": -269.92340087890625, "loss": 0.5479, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.5158240795135498, "rewards/margins": 0.6448057889938354, "rewards/rejected": -0.12898170948028564, "step": 500 }, { "epoch": 0.12, "learning_rate": 2.044907778668805e-07, "logits/chosen": -2.827486515045166, "logits/rejected": -2.677001714706421, "logps/chosen": -295.91949462890625, "logps/rejected": -216.48403930664062, "loss": 0.6283, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.48119044303894043, "rewards/margins": 0.4074464440345764, "rewards/rejected": 0.07374398410320282, "step": 510 }, { "epoch": 0.13, "learning_rate": 2.0850040096230954e-07, "logits/chosen": -2.9389195442199707, "logits/rejected": -2.760077714920044, "logps/chosen": -323.9413146972656, "logps/rejected": -243.3992156982422, "loss": 0.5955, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.2887626886367798, "rewards/margins": 0.7296485304832458, "rewards/rejected": -0.44088587164878845, "step": 520 }, { "epoch": 0.13, "learning_rate": 2.1251002405773858e-07, "logits/chosen": -2.861633777618408, "logits/rejected": -2.8447012901306152, "logps/chosen": -327.47515869140625, "logps/rejected": -267.309814453125, "loss": 0.5627, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.7268004417419434, "rewards/margins": 0.37961870431900024, "rewards/rejected": 0.34718185663223267, "step": 530 }, { "epoch": 0.13, "learning_rate": 2.165196471531676e-07, "logits/chosen": -2.561352252960205, "logits/rejected": -2.57576584815979, "logps/chosen": -208.40731811523438, "logps/rejected": -216.1819305419922, "loss": 0.624, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.3119715750217438, "rewards/margins": 0.571592390537262, "rewards/rejected": -0.2596207857131958, "step": 540 }, { "epoch": 0.13, "learning_rate": 2.2052927024859663e-07, "logits/chosen": -2.7754769325256348, "logits/rejected": -2.771836757659912, "logps/chosen": -298.20635986328125, "logps/rejected": -238.71176147460938, "loss": 0.5883, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.4887056350708008, "rewards/margins": 0.631751537322998, "rewards/rejected": -0.14304590225219727, "step": 550 }, { "epoch": 0.13, "learning_rate": 2.2453889334402564e-07, "logits/chosen": -2.7937541007995605, "logits/rejected": -2.68151593208313, "logps/chosen": -196.40573120117188, "logps/rejected": -118.2184066772461, "loss": 0.504, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.5687293410301208, "rewards/margins": 0.7040340304374695, "rewards/rejected": -0.13530471920967102, "step": 560 }, { "epoch": 0.14, "learning_rate": 2.285485164394547e-07, "logits/chosen": -2.697080612182617, "logits/rejected": -2.7340171337127686, "logps/chosen": -198.97213745117188, "logps/rejected": -264.6471252441406, "loss": 0.5554, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.41746944189071655, "rewards/margins": 0.843182384967804, "rewards/rejected": -0.425712913274765, "step": 570 }, { "epoch": 0.14, "learning_rate": 2.3255813953488372e-07, "logits/chosen": -2.761768341064453, "logits/rejected": -2.817594051361084, "logps/chosen": -209.8280487060547, "logps/rejected": -212.88150024414062, "loss": 0.5672, "rewards/accuracies": 0.75, "rewards/chosen": 0.36323896050453186, "rewards/margins": 0.4819954037666321, "rewards/rejected": -0.1187564879655838, "step": 580 }, { "epoch": 0.14, "learning_rate": 2.3656776263031275e-07, "logits/chosen": -2.87705397605896, "logits/rejected": -2.7968459129333496, "logps/chosen": -276.10699462890625, "logps/rejected": -210.0780792236328, "loss": 0.5556, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.1134200468659401, "rewards/margins": 0.6706252694129944, "rewards/rejected": -0.5572052001953125, "step": 590 }, { "epoch": 0.14, "learning_rate": 2.405773857257418e-07, "logits/chosen": -2.921949863433838, "logits/rejected": -2.852156639099121, "logps/chosen": -299.18609619140625, "logps/rejected": -249.9677276611328, "loss": 0.5832, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.048682499676942825, "rewards/margins": 0.2762266993522644, "rewards/rejected": -0.3249092102050781, "step": 600 }, { "epoch": 0.15, "learning_rate": 2.445870088211708e-07, "logits/chosen": -2.8194832801818848, "logits/rejected": -2.804983615875244, "logps/chosen": -288.4292907714844, "logps/rejected": -270.6551208496094, "loss": 0.597, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.05671919509768486, "rewards/margins": 0.0665658563375473, "rewards/rejected": -0.00984666682779789, "step": 610 }, { "epoch": 0.15, "learning_rate": 2.485966319165998e-07, "logits/chosen": -2.662090539932251, "logits/rejected": -2.7223060131073, "logps/chosen": -160.1973419189453, "logps/rejected": -188.27352905273438, "loss": 0.685, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.15639618039131165, "rewards/margins": 0.5227680802345276, "rewards/rejected": -0.36637189984321594, "step": 620 }, { "epoch": 0.15, "learning_rate": 2.526062550120289e-07, "logits/chosen": -2.924830913543701, "logits/rejected": -2.8918211460113525, "logps/chosen": -223.8957977294922, "logps/rejected": -203.39224243164062, "loss": 0.6228, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.24558000266551971, "rewards/margins": 0.5432695150375366, "rewards/rejected": -0.7888495326042175, "step": 630 }, { "epoch": 0.15, "learning_rate": 2.566158781074579e-07, "logits/chosen": -2.8393218517303467, "logits/rejected": -2.8019471168518066, "logps/chosen": -247.6842803955078, "logps/rejected": -235.5037078857422, "loss": 0.5741, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2942962050437927, "rewards/margins": 0.29929882287979126, "rewards/rejected": -0.593595027923584, "step": 640 }, { "epoch": 0.16, "learning_rate": 2.606255012028869e-07, "logits/chosen": -2.76501202583313, "logits/rejected": -2.7004733085632324, "logps/chosen": -301.2521057128906, "logps/rejected": -254.12240600585938, "loss": 0.5037, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.36596235632896423, "rewards/margins": 0.8270140886306763, "rewards/rejected": -1.1929763555526733, "step": 650 }, { "epoch": 0.16, "learning_rate": 2.6463512429831596e-07, "logits/chosen": -2.7481508255004883, "logits/rejected": -2.6847853660583496, "logps/chosen": -249.2167205810547, "logps/rejected": -258.1322326660156, "loss": 0.5659, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2779560089111328, "rewards/margins": 0.5480610728263855, "rewards/rejected": -0.8260170817375183, "step": 660 }, { "epoch": 0.16, "learning_rate": 2.68644747393745e-07, "logits/chosen": -2.7402031421661377, "logits/rejected": -2.674595355987549, "logps/chosen": -216.8399200439453, "logps/rejected": -173.94277954101562, "loss": 0.5758, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.18031704425811768, "rewards/margins": 0.51617032289505, "rewards/rejected": -0.6964873671531677, "step": 670 }, { "epoch": 0.16, "learning_rate": 2.72654370489174e-07, "logits/chosen": -2.7919907569885254, "logits/rejected": -2.7736024856567383, "logps/chosen": -214.3600311279297, "logps/rejected": -296.0640563964844, "loss": 0.5332, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3602636456489563, "rewards/margins": 0.7699328660964966, "rewards/rejected": -1.1301965713500977, "step": 680 }, { "epoch": 0.17, "learning_rate": 2.76663993584603e-07, "logits/chosen": -2.8806838989257812, "logits/rejected": -2.8978848457336426, "logps/chosen": -304.77935791015625, "logps/rejected": -299.870849609375, "loss": 0.6241, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.06528373807668686, "rewards/margins": 0.42759591341018677, "rewards/rejected": -0.49287962913513184, "step": 690 }, { "epoch": 0.17, "learning_rate": 2.8067361668003206e-07, "logits/chosen": -2.5968194007873535, "logits/rejected": -2.460662603378296, "logps/chosen": -306.84686279296875, "logps/rejected": -230.10562133789062, "loss": 0.5487, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.11472682654857635, "rewards/margins": 0.6224948167800903, "rewards/rejected": -0.7372217178344727, "step": 700 }, { "epoch": 0.17, "learning_rate": 2.8468323977546113e-07, "logits/chosen": -2.8128252029418945, "logits/rejected": -2.7990946769714355, "logps/chosen": -328.0445251464844, "logps/rejected": -307.45086669921875, "loss": 0.5435, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.06992456316947937, "rewards/margins": 0.7968131303787231, "rewards/rejected": -0.7268885374069214, "step": 710 }, { "epoch": 0.17, "learning_rate": 2.8869286287089014e-07, "logits/chosen": -2.818443775177002, "logits/rejected": -2.7941231727600098, "logps/chosen": -310.6451721191406, "logps/rejected": -250.27383422851562, "loss": 0.5224, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2724413573741913, "rewards/margins": 0.7151150107383728, "rewards/rejected": -0.9875563383102417, "step": 720 }, { "epoch": 0.18, "learning_rate": 2.9270248596631915e-07, "logits/chosen": -2.6747488975524902, "logits/rejected": -2.6234326362609863, "logps/chosen": -254.17636108398438, "logps/rejected": -259.8215026855469, "loss": 0.6097, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1818128079175949, "rewards/margins": 1.5986982583999634, "rewards/rejected": -1.7805109024047852, "step": 730 }, { "epoch": 0.18, "learning_rate": 2.967121090617482e-07, "logits/chosen": -2.5844151973724365, "logits/rejected": -2.509438991546631, "logps/chosen": -196.0883026123047, "logps/rejected": -192.82748413085938, "loss": 0.551, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4821125864982605, "rewards/margins": 0.5221725702285767, "rewards/rejected": -1.0042850971221924, "step": 740 }, { "epoch": 0.18, "learning_rate": 3.007217321571772e-07, "logits/chosen": -2.9302337169647217, "logits/rejected": -2.8687689304351807, "logps/chosen": -376.0974426269531, "logps/rejected": -341.4430236816406, "loss": 0.5568, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5697919130325317, "rewards/margins": 0.8332484364509583, "rewards/rejected": -1.4030402898788452, "step": 750 }, { "epoch": 0.18, "learning_rate": 3.0473135525260624e-07, "logits/chosen": -2.728005886077881, "logits/rejected": -2.7057783603668213, "logps/chosen": -255.732421875, "logps/rejected": -273.4658203125, "loss": 0.5402, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.38343286514282227, "rewards/margins": 0.8520215153694153, "rewards/rejected": -1.2354543209075928, "step": 760 }, { "epoch": 0.19, "learning_rate": 3.0874097834803525e-07, "logits/chosen": -2.6662607192993164, "logits/rejected": -2.6617627143859863, "logps/chosen": -367.4737854003906, "logps/rejected": -286.6307373046875, "loss": 0.5507, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.372802495956421, "rewards/margins": 0.3644545376300812, "rewards/rejected": -1.7372572422027588, "step": 770 }, { "epoch": 0.19, "learning_rate": 3.127506014434643e-07, "logits/chosen": -2.499572277069092, "logits/rejected": -2.6057636737823486, "logps/chosen": -290.12115478515625, "logps/rejected": -296.11944580078125, "loss": 0.5236, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3911897540092468, "rewards/margins": 1.4719053506851196, "rewards/rejected": -1.8630950450897217, "step": 780 }, { "epoch": 0.19, "learning_rate": 3.167602245388933e-07, "logits/chosen": -2.8040318489074707, "logits/rejected": -2.7535839080810547, "logps/chosen": -255.7650909423828, "logps/rejected": -202.61978149414062, "loss": 0.5597, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1364818662405014, "rewards/margins": 0.7933239340782166, "rewards/rejected": -0.9298057556152344, "step": 790 }, { "epoch": 0.19, "learning_rate": 3.2076984763432233e-07, "logits/chosen": -2.8974623680114746, "logits/rejected": -2.84101939201355, "logps/chosen": -307.80230712890625, "logps/rejected": -288.81854248046875, "loss": 0.5024, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.2426052987575531, "rewards/margins": 0.36778146028518677, "rewards/rejected": -0.12517614662647247, "step": 800 }, { "epoch": 0.19, "eval_logits/chosen": -2.5328729152679443, "eval_logits/rejected": -2.5092859268188477, "eval_logps/chosen": -203.2058563232422, "eval_logps/rejected": -199.95619201660156, "eval_loss": 0.5111984610557556, "eval_rewards/accuracies": 0.6675000190734863, "eval_rewards/chosen": -0.12783148884773254, "eval_rewards/margins": 0.9147088527679443, "eval_rewards/rejected": -1.042540192604065, "eval_runtime": 133.661, "eval_samples_per_second": 23.612, "eval_steps_per_second": 0.374, "step": 800 }, { "epoch": 0.19, "learning_rate": 3.2477947072975135e-07, "logits/chosen": -2.769944190979004, "logits/rejected": -2.7940926551818848, "logps/chosen": -251.9970245361328, "logps/rejected": -233.0319061279297, "loss": 0.602, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.244999960064888, "rewards/margins": 0.5882779359817505, "rewards/rejected": -0.8332778215408325, "step": 810 }, { "epoch": 0.2, "learning_rate": 3.2878909382518046e-07, "logits/chosen": -2.7508976459503174, "logits/rejected": -2.6556122303009033, "logps/chosen": -268.6809997558594, "logps/rejected": -225.40792846679688, "loss": 0.5085, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.32555705308914185, "rewards/margins": 0.5282832384109497, "rewards/rejected": -0.8538403511047363, "step": 820 }, { "epoch": 0.2, "learning_rate": 3.327987169206095e-07, "logits/chosen": -2.651878833770752, "logits/rejected": -2.659212350845337, "logps/chosen": -193.90365600585938, "logps/rejected": -274.38616943359375, "loss": 0.5765, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.4068203866481781, "rewards/margins": 0.3031473159790039, "rewards/rejected": -0.7099677324295044, "step": 830 }, { "epoch": 0.2, "learning_rate": 3.368083400160385e-07, "logits/chosen": -2.663372039794922, "logits/rejected": -2.8095250129699707, "logps/chosen": -200.6801300048828, "logps/rejected": -225.0947265625, "loss": 0.5662, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.04706698656082153, "rewards/margins": 0.3949834406375885, "rewards/rejected": -0.34791645407676697, "step": 840 }, { "epoch": 0.2, "learning_rate": 3.408179631114675e-07, "logits/chosen": -2.638129711151123, "logits/rejected": -2.600186824798584, "logps/chosen": -226.84945678710938, "logps/rejected": -225.19082641601562, "loss": 0.5461, "rewards/accuracies": 0.75, "rewards/chosen": 0.02068863809108734, "rewards/margins": 0.9978263974189758, "rewards/rejected": -0.9771377444267273, "step": 850 }, { "epoch": 0.21, "learning_rate": 3.4482758620689656e-07, "logits/chosen": -2.5684962272644043, "logits/rejected": -2.7589926719665527, "logps/chosen": -220.42724609375, "logps/rejected": -334.71624755859375, "loss": 0.5906, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.264083594083786, "rewards/margins": 0.5368421673774719, "rewards/rejected": -0.8009258508682251, "step": 860 }, { "epoch": 0.21, "learning_rate": 3.4883720930232557e-07, "logits/chosen": -2.830177068710327, "logits/rejected": -2.667341709136963, "logps/chosen": -304.4564514160156, "logps/rejected": -249.4889678955078, "loss": 0.553, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.17647650837898254, "rewards/margins": 1.0650815963745117, "rewards/rejected": -1.2415580749511719, "step": 870 }, { "epoch": 0.21, "learning_rate": 3.528468323977546e-07, "logits/chosen": -2.8149116039276123, "logits/rejected": -2.7310032844543457, "logps/chosen": -255.3647918701172, "logps/rejected": -217.87197875976562, "loss": 0.6495, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.30641308426856995, "rewards/margins": 0.5809476971626282, "rewards/rejected": -0.8873607516288757, "step": 880 }, { "epoch": 0.21, "learning_rate": 3.568564554931836e-07, "logits/chosen": -2.7607791423797607, "logits/rejected": -2.804769515991211, "logps/chosen": -281.76788330078125, "logps/rejected": -262.23126220703125, "loss": 0.5512, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.5253441333770752, "rewards/margins": 0.4526292383670807, "rewards/rejected": -0.9779733419418335, "step": 890 }, { "epoch": 0.22, "learning_rate": 3.6086607858861266e-07, "logits/chosen": -2.9751179218292236, "logits/rejected": -2.9251646995544434, "logps/chosen": -310.1224365234375, "logps/rejected": -311.144287109375, "loss": 0.5392, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.6419559717178345, "rewards/margins": 0.9658881425857544, "rewards/rejected": -1.6078441143035889, "step": 900 }, { "epoch": 0.22, "learning_rate": 3.6487570168404167e-07, "logits/chosen": -2.8095269203186035, "logits/rejected": -2.7687015533447266, "logps/chosen": -315.8305969238281, "logps/rejected": -280.93994140625, "loss": 0.5323, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05711637809872627, "rewards/margins": 0.9887657165527344, "rewards/rejected": -1.045882225036621, "step": 910 }, { "epoch": 0.22, "learning_rate": 3.688853247794707e-07, "logits/chosen": -2.6602730751037598, "logits/rejected": -2.648829936981201, "logps/chosen": -287.0523376464844, "logps/rejected": -225.801513671875, "loss": 0.7499, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.11555546522140503, "rewards/margins": 1.0678458213806152, "rewards/rejected": -1.183401107788086, "step": 920 }, { "epoch": 0.22, "learning_rate": 3.7289494787489975e-07, "logits/chosen": -2.725248336791992, "logits/rejected": -2.6758840084075928, "logps/chosen": -277.663330078125, "logps/rejected": -233.9459228515625, "loss": 0.5284, "rewards/accuracies": 0.75, "rewards/chosen": 0.5111667513847351, "rewards/margins": 1.2667210102081299, "rewards/rejected": -0.7555543780326843, "step": 930 }, { "epoch": 0.23, "learning_rate": 3.769045709703288e-07, "logits/chosen": -2.7036404609680176, "logits/rejected": -2.615960121154785, "logps/chosen": -248.12332153320312, "logps/rejected": -220.653564453125, "loss": 0.5286, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5193105936050415, "rewards/margins": 0.9040799140930176, "rewards/rejected": -1.423390507698059, "step": 940 }, { "epoch": 0.23, "learning_rate": 3.809141940657578e-07, "logits/chosen": -2.7405402660369873, "logits/rejected": -2.5490360260009766, "logps/chosen": -218.7405548095703, "logps/rejected": -209.5735321044922, "loss": 0.5431, "rewards/accuracies": 0.75, "rewards/chosen": -0.42782601714134216, "rewards/margins": 1.1354576349258423, "rewards/rejected": -1.5632835626602173, "step": 950 }, { "epoch": 0.23, "learning_rate": 3.8492381716118683e-07, "logits/chosen": -2.7462010383605957, "logits/rejected": -2.7566380500793457, "logps/chosen": -251.06198120117188, "logps/rejected": -270.6859130859375, "loss": 0.5315, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.5047445297241211, "rewards/margins": 0.46457457542419434, "rewards/rejected": -0.9693191647529602, "step": 960 }, { "epoch": 0.23, "learning_rate": 3.8893344025661585e-07, "logits/chosen": -2.7401294708251953, "logits/rejected": -2.7194225788116455, "logps/chosen": -238.30801391601562, "logps/rejected": -263.53265380859375, "loss": 0.5761, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.07245366275310516, "rewards/margins": 0.8650237917900085, "rewards/rejected": -0.792570173740387, "step": 970 }, { "epoch": 0.24, "learning_rate": 3.929430633520449e-07, "logits/chosen": -2.621753454208374, "logits/rejected": -2.5244412422180176, "logps/chosen": -302.0888671875, "logps/rejected": -295.4062805175781, "loss": 0.5195, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.619012176990509, "rewards/margins": 1.0741729736328125, "rewards/rejected": -1.6931850910186768, "step": 980 }, { "epoch": 0.24, "learning_rate": 3.969526864474739e-07, "logits/chosen": -2.7257871627807617, "logits/rejected": -2.7017054557800293, "logps/chosen": -252.85275268554688, "logps/rejected": -236.3311309814453, "loss": 0.5415, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.2137785255908966, "rewards/margins": 0.5660091042518616, "rewards/rejected": -0.7797876000404358, "step": 990 }, { "epoch": 0.24, "learning_rate": 4.0096230954290293e-07, "logits/chosen": -2.5677154064178467, "logits/rejected": -2.4841880798339844, "logps/chosen": -311.47845458984375, "logps/rejected": -255.2757110595703, "loss": 0.4928, "rewards/accuracies": 0.75, "rewards/chosen": -0.5334405899047852, "rewards/margins": 1.1248290538787842, "rewards/rejected": -1.6582696437835693, "step": 1000 }, { "epoch": 0.24, "learning_rate": 4.0497193263833194e-07, "logits/chosen": -2.8594164848327637, "logits/rejected": -2.7990007400512695, "logps/chosen": -248.8976593017578, "logps/rejected": -212.68789672851562, "loss": 0.5403, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.7001456022262573, "rewards/margins": 0.3277003765106201, "rewards/rejected": -1.027845859527588, "step": 1010 }, { "epoch": 0.25, "learning_rate": 4.08981555733761e-07, "logits/chosen": -2.7849814891815186, "logits/rejected": -2.821704149246216, "logps/chosen": -351.97552490234375, "logps/rejected": -288.44549560546875, "loss": 0.6945, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.03144335746765137, "rewards/margins": 1.1996476650238037, "rewards/rejected": -1.231091022491455, "step": 1020 }, { "epoch": 0.25, "learning_rate": 4.1299117882919007e-07, "logits/chosen": -2.619790554046631, "logits/rejected": -2.7029523849487305, "logps/chosen": -277.0049133300781, "logps/rejected": -285.37152099609375, "loss": 0.5943, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.15238934755325317, "rewards/margins": 1.0189682245254517, "rewards/rejected": -0.8665788769721985, "step": 1030 }, { "epoch": 0.25, "learning_rate": 4.170008019246191e-07, "logits/chosen": -2.7127845287323, "logits/rejected": -2.739548444747925, "logps/chosen": -241.86032104492188, "logps/rejected": -307.84698486328125, "loss": 0.6209, "rewards/accuracies": 0.75, "rewards/chosen": 0.2921208441257477, "rewards/margins": 1.2693531513214111, "rewards/rejected": -0.9772324562072754, "step": 1040 }, { "epoch": 0.25, "learning_rate": 4.210104250200481e-07, "logits/chosen": -2.957655906677246, "logits/rejected": -2.835099458694458, "logps/chosen": -259.3416748046875, "logps/rejected": -221.20620727539062, "loss": 0.7344, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.10282768309116364, "rewards/margins": 0.7172152400016785, "rewards/rejected": -0.614387571811676, "step": 1050 }, { "epoch": 0.26, "learning_rate": 4.2502004811547716e-07, "logits/chosen": -2.872497797012329, "logits/rejected": -2.7167131900787354, "logps/chosen": -209.87173461914062, "logps/rejected": -172.0841522216797, "loss": 0.586, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0810152068734169, "rewards/margins": 1.754184365272522, "rewards/rejected": -1.8351995944976807, "step": 1060 }, { "epoch": 0.26, "learning_rate": 4.2902967121090617e-07, "logits/chosen": -2.8070569038391113, "logits/rejected": -2.742454767227173, "logps/chosen": -148.70144653320312, "logps/rejected": -207.06668090820312, "loss": 0.5861, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.250434547662735, "rewards/margins": 0.7638150453567505, "rewards/rejected": -1.014249563217163, "step": 1070 }, { "epoch": 0.26, "learning_rate": 4.330392943063352e-07, "logits/chosen": -2.9118402004241943, "logits/rejected": -2.755366325378418, "logps/chosen": -277.2066345214844, "logps/rejected": -326.04217529296875, "loss": 0.6995, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.11643274873495102, "rewards/margins": 1.0064040422439575, "rewards/rejected": -0.8899710774421692, "step": 1080 }, { "epoch": 0.26, "learning_rate": 4.370489174017642e-07, "logits/chosen": -2.7893195152282715, "logits/rejected": -2.785935878753662, "logps/chosen": -145.8661651611328, "logps/rejected": -232.83871459960938, "loss": 0.7166, "rewards/accuracies": 0.75, "rewards/chosen": -0.47274595499038696, "rewards/margins": 1.0937156677246094, "rewards/rejected": -1.5664615631103516, "step": 1090 }, { "epoch": 0.26, "learning_rate": 4.4105854049719326e-07, "logits/chosen": -2.636298418045044, "logits/rejected": -2.638779401779175, "logps/chosen": -279.6634521484375, "logps/rejected": -199.56356811523438, "loss": 0.6493, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.5719181299209595, "rewards/margins": 0.5789961814880371, "rewards/rejected": -1.1509143114089966, "step": 1100 }, { "epoch": 0.27, "learning_rate": 4.4506816359262227e-07, "logits/chosen": -2.9740004539489746, "logits/rejected": -2.8613905906677246, "logps/chosen": -353.7268981933594, "logps/rejected": -265.4058532714844, "loss": 0.4656, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.07540614902973175, "rewards/margins": 1.0848721265792847, "rewards/rejected": -1.1602783203125, "step": 1110 }, { "epoch": 0.27, "learning_rate": 4.490777866880513e-07, "logits/chosen": -3.0363729000091553, "logits/rejected": -2.765496015548706, "logps/chosen": -289.9078369140625, "logps/rejected": -216.5182647705078, "loss": 0.6634, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.1546178013086319, "rewards/margins": 1.15028977394104, "rewards/rejected": -0.9956720471382141, "step": 1120 }, { "epoch": 0.27, "learning_rate": 4.530874097834803e-07, "logits/chosen": -2.84161639213562, "logits/rejected": -2.7532410621643066, "logps/chosen": -203.11619567871094, "logps/rejected": -198.95468139648438, "loss": 0.5293, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.09384959191083908, "rewards/margins": 1.0006788969039917, "rewards/rejected": -0.9068293571472168, "step": 1130 }, { "epoch": 0.27, "learning_rate": 4.570970328789094e-07, "logits/chosen": -2.73176908493042, "logits/rejected": -2.7268660068511963, "logps/chosen": -284.72882080078125, "logps/rejected": -385.8221435546875, "loss": 0.6296, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.8947023153305054, "rewards/margins": 2.0185418128967285, "rewards/rejected": -2.9132442474365234, "step": 1140 }, { "epoch": 0.28, "learning_rate": 4.611066559743384e-07, "logits/chosen": -2.810541868209839, "logits/rejected": -2.868605136871338, "logps/chosen": -280.7793273925781, "logps/rejected": -270.59173583984375, "loss": 0.4983, "rewards/accuracies": 0.75, "rewards/chosen": -0.2828827202320099, "rewards/margins": 0.7669601440429688, "rewards/rejected": -1.0498428344726562, "step": 1150 }, { "epoch": 0.28, "learning_rate": 4.6511627906976743e-07, "logits/chosen": -2.673184871673584, "logits/rejected": -2.787496328353882, "logps/chosen": -229.1334228515625, "logps/rejected": -247.783935546875, "loss": 0.546, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.19098523259162903, "rewards/margins": 0.7509520649909973, "rewards/rejected": -0.9419373273849487, "step": 1160 }, { "epoch": 0.28, "learning_rate": 4.6912590216519644e-07, "logits/chosen": -2.7520358562469482, "logits/rejected": -2.7745821475982666, "logps/chosen": -254.341796875, "logps/rejected": -270.19219970703125, "loss": 0.6116, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.44708842039108276, "rewards/margins": 0.5681658983230591, "rewards/rejected": -1.0152543783187866, "step": 1170 }, { "epoch": 0.28, "learning_rate": 4.731355252606255e-07, "logits/chosen": -2.7063660621643066, "logits/rejected": -2.681487560272217, "logps/chosen": -252.1110076904297, "logps/rejected": -236.2973175048828, "loss": 0.6036, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.22551283240318298, "rewards/margins": 1.2284777164459229, "rewards/rejected": -1.002964735031128, "step": 1180 }, { "epoch": 0.29, "learning_rate": 4.771451483560545e-07, "logits/chosen": -2.7218141555786133, "logits/rejected": -2.686691999435425, "logps/chosen": -270.92645263671875, "logps/rejected": -258.6405029296875, "loss": 0.4865, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.3326466977596283, "rewards/margins": 1.375840425491333, "rewards/rejected": -1.0431935787200928, "step": 1190 }, { "epoch": 0.29, "learning_rate": 4.811547714514836e-07, "logits/chosen": -2.9489059448242188, "logits/rejected": -2.862583637237549, "logps/chosen": -250.0316619873047, "logps/rejected": -228.2823944091797, "loss": 0.5728, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.3286787271499634, "rewards/margins": 0.5910458564758301, "rewards/rejected": -0.9197246432304382, "step": 1200 }, { "epoch": 0.29, "eval_logits/chosen": -2.505753993988037, "eval_logits/rejected": -2.4770805835723877, "eval_logps/chosen": -209.3626708984375, "eval_logps/rejected": -207.41122436523438, "eval_loss": 0.5323905944824219, "eval_rewards/accuracies": 0.6424999833106995, "eval_rewards/chosen": -0.7435135841369629, "eval_rewards/margins": 1.0445295572280884, "eval_rewards/rejected": -1.7880432605743408, "eval_runtime": 131.8001, "eval_samples_per_second": 23.945, "eval_steps_per_second": 0.379, "step": 1200 }, { "epoch": 0.29, "learning_rate": 4.851643945469126e-07, "logits/chosen": -2.7622861862182617, "logits/rejected": -2.752068281173706, "logps/chosen": -209.67770385742188, "logps/rejected": -191.7538604736328, "loss": 0.5126, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07353958487510681, "rewards/margins": 1.6461225748062134, "rewards/rejected": -1.719662070274353, "step": 1210 }, { "epoch": 0.29, "learning_rate": 4.891740176423416e-07, "logits/chosen": -2.607412815093994, "logits/rejected": -2.6985273361206055, "logps/chosen": -273.7203063964844, "logps/rejected": -259.5631103515625, "loss": 0.5619, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.542596697807312, "rewards/margins": 1.319077491760254, "rewards/rejected": -1.8616740703582764, "step": 1220 }, { "epoch": 0.3, "learning_rate": 4.931836407377706e-07, "logits/chosen": -2.8004584312438965, "logits/rejected": -2.67830491065979, "logps/chosen": -290.6675109863281, "logps/rejected": -207.16372680664062, "loss": 0.5771, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.5204219818115234, "rewards/margins": 1.4301979541778564, "rewards/rejected": -1.9506199359893799, "step": 1230 }, { "epoch": 0.3, "learning_rate": 4.971932638331996e-07, "logits/chosen": -2.8502564430236816, "logits/rejected": -2.75368070602417, "logps/chosen": -262.37274169921875, "logps/rejected": -303.19281005859375, "loss": 0.5791, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.9246293902397156, "rewards/margins": 0.9244276285171509, "rewards/rejected": -1.8490569591522217, "step": 1240 }, { "epoch": 0.3, "learning_rate": 4.998662863255482e-07, "logits/chosen": -2.893864154815674, "logits/rejected": -2.782992362976074, "logps/chosen": -311.4219055175781, "logps/rejected": -214.0582275390625, "loss": 0.7277, "rewards/accuracies": 0.75, "rewards/chosen": -0.8336235284805298, "rewards/margins": 1.3086597919464111, "rewards/rejected": -2.1422832012176514, "step": 1250 }, { "epoch": 0.3, "learning_rate": 4.994205740773756e-07, "logits/chosen": -2.7738232612609863, "logits/rejected": -2.7849669456481934, "logps/chosen": -237.98391723632812, "logps/rejected": -249.3274383544922, "loss": 0.5599, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.3302724361419678, "rewards/margins": 0.6252075433731079, "rewards/rejected": -1.9554798603057861, "step": 1260 }, { "epoch": 0.31, "learning_rate": 4.989748618292031e-07, "logits/chosen": -2.7021219730377197, "logits/rejected": -2.466907024383545, "logps/chosen": -318.3983154296875, "logps/rejected": -291.46417236328125, "loss": 0.5715, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.451961636543274, "rewards/margins": 1.726300835609436, "rewards/rejected": -3.178262233734131, "step": 1270 }, { "epoch": 0.31, "learning_rate": 4.985291495810304e-07, "logits/chosen": -2.810925006866455, "logits/rejected": -2.7793993949890137, "logps/chosen": -438.08441162109375, "logps/rejected": -412.2474060058594, "loss": 0.4904, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.7998260855674744, "rewards/margins": 1.8961460590362549, "rewards/rejected": -2.695971965789795, "step": 1280 }, { "epoch": 0.31, "learning_rate": 4.980834373328579e-07, "logits/chosen": -2.717757225036621, "logits/rejected": -2.6055784225463867, "logps/chosen": -280.00042724609375, "logps/rejected": -252.255859375, "loss": 0.5581, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.6964629888534546, "rewards/margins": 1.0425831079483032, "rewards/rejected": -1.7390460968017578, "step": 1290 }, { "epoch": 0.31, "learning_rate": 4.976377250846854e-07, "logits/chosen": -2.8040218353271484, "logits/rejected": -2.7467641830444336, "logps/chosen": -267.19720458984375, "logps/rejected": -313.90496826171875, "loss": 0.8625, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.043508004397153854, "rewards/margins": 1.1382510662078857, "rewards/rejected": -1.1817591190338135, "step": 1300 }, { "epoch": 0.32, "learning_rate": 4.971920128365127e-07, "logits/chosen": -2.809016466140747, "logits/rejected": -2.7497928142547607, "logps/chosen": -343.20196533203125, "logps/rejected": -262.23370361328125, "loss": 0.612, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.9264835119247437, "rewards/margins": 1.130632758140564, "rewards/rejected": -2.0571162700653076, "step": 1310 }, { "epoch": 0.32, "learning_rate": 4.967463005883402e-07, "logits/chosen": -2.731447696685791, "logits/rejected": -2.6999869346618652, "logps/chosen": -314.8204345703125, "logps/rejected": -280.8206787109375, "loss": 0.5644, "rewards/accuracies": 0.75, "rewards/chosen": -0.8434036374092102, "rewards/margins": 0.6722986102104187, "rewards/rejected": -1.515702247619629, "step": 1320 }, { "epoch": 0.32, "learning_rate": 4.963005883401676e-07, "logits/chosen": -2.5341479778289795, "logits/rejected": -2.5332157611846924, "logps/chosen": -202.1023406982422, "logps/rejected": -190.7056427001953, "loss": 0.6538, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.0355693101882935, "rewards/margins": 0.49540454149246216, "rewards/rejected": -1.5309737920761108, "step": 1330 }, { "epoch": 0.32, "learning_rate": 4.95854876091995e-07, "logits/chosen": -2.5441746711730957, "logits/rejected": -2.4266505241394043, "logps/chosen": -311.6667785644531, "logps/rejected": -295.31048583984375, "loss": 0.6216, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.6037508845329285, "rewards/margins": 1.1581532955169678, "rewards/rejected": -1.7619041204452515, "step": 1340 }, { "epoch": 0.32, "learning_rate": 4.954091638438224e-07, "logits/chosen": -2.677034854888916, "logits/rejected": -2.6806418895721436, "logps/chosen": -245.2216796875, "logps/rejected": -239.7243194580078, "loss": 0.5543, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.7556982040405273, "rewards/margins": 1.2748024463653564, "rewards/rejected": -3.030500888824463, "step": 1350 }, { "epoch": 0.33, "learning_rate": 4.949634515956499e-07, "logits/chosen": -2.53664493560791, "logits/rejected": -2.3591296672821045, "logps/chosen": -243.69992065429688, "logps/rejected": -233.404296875, "loss": 0.4621, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.5793843269348145, "rewards/margins": 1.578896164894104, "rewards/rejected": -4.158279895782471, "step": 1360 }, { "epoch": 0.33, "learning_rate": 4.945177393474772e-07, "logits/chosen": -2.6049957275390625, "logits/rejected": -2.430155038833618, "logps/chosen": -348.60540771484375, "logps/rejected": -263.60772705078125, "loss": 0.4201, "rewards/accuracies": 0.75, "rewards/chosen": -1.364793300628662, "rewards/margins": 1.9968388080596924, "rewards/rejected": -3.3616321086883545, "step": 1370 }, { "epoch": 0.33, "learning_rate": 4.940720270993047e-07, "logits/chosen": -2.5204038619995117, "logits/rejected": -2.5334765911102295, "logps/chosen": -231.40945434570312, "logps/rejected": -271.99517822265625, "loss": 0.5314, "rewards/accuracies": 0.75, "rewards/chosen": -1.6743671894073486, "rewards/margins": 0.9893819689750671, "rewards/rejected": -2.6637492179870605, "step": 1380 }, { "epoch": 0.33, "learning_rate": 4.936263148511321e-07, "logits/chosen": -2.5044212341308594, "logits/rejected": -2.675902843475342, "logps/chosen": -305.47100830078125, "logps/rejected": -262.70404052734375, "loss": 0.6683, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.6784005165100098, "rewards/margins": 0.4521772265434265, "rewards/rejected": -3.130577802658081, "step": 1390 }, { "epoch": 0.34, "learning_rate": 4.931806026029595e-07, "logits/chosen": -2.485682964324951, "logits/rejected": -2.54756498336792, "logps/chosen": -285.7750244140625, "logps/rejected": -288.01324462890625, "loss": 0.665, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.9433006048202515, "rewards/margins": 1.605025053024292, "rewards/rejected": -2.548325777053833, "step": 1400 }, { "epoch": 0.34, "learning_rate": 4.927348903547869e-07, "logits/chosen": -2.837623357772827, "logits/rejected": -2.789177656173706, "logps/chosen": -302.69189453125, "logps/rejected": -281.81427001953125, "loss": 0.5974, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -1.31369948387146, "rewards/margins": 0.359649121761322, "rewards/rejected": -1.6733486652374268, "step": 1410 }, { "epoch": 0.34, "learning_rate": 4.922891781066144e-07, "logits/chosen": -2.6415786743164062, "logits/rejected": -2.67337703704834, "logps/chosen": -215.93118286132812, "logps/rejected": -224.37893676757812, "loss": 0.5327, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.8797400593757629, "rewards/margins": 1.2374986410140991, "rewards/rejected": -2.117238759994507, "step": 1420 }, { "epoch": 0.34, "learning_rate": 4.918434658584418e-07, "logits/chosen": -2.727658748626709, "logits/rejected": -2.574336051940918, "logps/chosen": -404.3485412597656, "logps/rejected": -287.9634094238281, "loss": 0.5279, "rewards/accuracies": 0.75, "rewards/chosen": -1.3983969688415527, "rewards/margins": 1.1075738668441772, "rewards/rejected": -2.5059709548950195, "step": 1430 }, { "epoch": 0.35, "learning_rate": 4.913977536102692e-07, "logits/chosen": -2.5600686073303223, "logits/rejected": -2.516930103302002, "logps/chosen": -232.35513305664062, "logps/rejected": -251.1455535888672, "loss": 0.5859, "rewards/accuracies": 0.75, "rewards/chosen": -0.8366307020187378, "rewards/margins": 1.2499377727508545, "rewards/rejected": -2.0865683555603027, "step": 1440 }, { "epoch": 0.35, "learning_rate": 4.909520413620967e-07, "logits/chosen": -2.480833053588867, "logits/rejected": -2.467857599258423, "logps/chosen": -271.05987548828125, "logps/rejected": -275.52978515625, "loss": 0.5631, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.09460250288248062, "rewards/margins": 1.4983643293380737, "rewards/rejected": -1.4037621021270752, "step": 1450 }, { "epoch": 0.35, "learning_rate": 4.90506329113924e-07, "logits/chosen": -2.670718193054199, "logits/rejected": -2.590294361114502, "logps/chosen": -323.92510986328125, "logps/rejected": -307.1265869140625, "loss": 0.5922, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5208776593208313, "rewards/margins": 1.4484494924545288, "rewards/rejected": -1.9693269729614258, "step": 1460 }, { "epoch": 0.35, "learning_rate": 4.900606168657515e-07, "logits/chosen": -2.4288439750671387, "logits/rejected": -2.445039987564087, "logps/chosen": -283.4742431640625, "logps/rejected": -322.4749755859375, "loss": 0.6183, "rewards/accuracies": 0.75, "rewards/chosen": -1.4125239849090576, "rewards/margins": 1.4633347988128662, "rewards/rejected": -2.875858783721924, "step": 1470 }, { "epoch": 0.36, "learning_rate": 4.896149046175789e-07, "logits/chosen": -2.7901694774627686, "logits/rejected": -2.541984796524048, "logps/chosen": -308.7502136230469, "logps/rejected": -289.9494934082031, "loss": 0.6705, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.36407652497291565, "rewards/margins": 1.3868237733840942, "rewards/rejected": -1.7509002685546875, "step": 1480 }, { "epoch": 0.36, "learning_rate": 4.891691923694063e-07, "logits/chosen": -2.732682704925537, "logits/rejected": -2.7537662982940674, "logps/chosen": -336.2591857910156, "logps/rejected": -362.84039306640625, "loss": 0.5476, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.7052796483039856, "rewards/margins": 1.150768518447876, "rewards/rejected": -1.8560482263565063, "step": 1490 }, { "epoch": 0.36, "learning_rate": 4.887234801212337e-07, "logits/chosen": -2.4859511852264404, "logits/rejected": -2.400803565979004, "logps/chosen": -212.07174682617188, "logps/rejected": -222.74081420898438, "loss": 0.7352, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.265228509902954, "rewards/margins": 0.8272930383682251, "rewards/rejected": -2.0925214290618896, "step": 1500 }, { "epoch": 0.36, "learning_rate": 4.882777678730611e-07, "logits/chosen": -2.7351162433624268, "logits/rejected": -2.7322583198547363, "logps/chosen": -275.1295471191406, "logps/rejected": -260.4400634765625, "loss": 0.6142, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.14362294971942902, "rewards/margins": 1.4526232481002808, "rewards/rejected": -1.596246361732483, "step": 1510 }, { "epoch": 0.37, "learning_rate": 4.878320556248885e-07, "logits/chosen": -2.7501864433288574, "logits/rejected": -2.702101230621338, "logps/chosen": -233.5178985595703, "logps/rejected": -226.6597137451172, "loss": 0.5753, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.8510408401489258, "rewards/margins": 0.6280291676521301, "rewards/rejected": -1.4790699481964111, "step": 1520 }, { "epoch": 0.37, "learning_rate": 4.87386343376716e-07, "logits/chosen": -2.802588701248169, "logits/rejected": -2.6868271827697754, "logps/chosen": -242.33499145507812, "logps/rejected": -192.27235412597656, "loss": 0.4946, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.8795742988586426, "rewards/margins": 0.8144742846488953, "rewards/rejected": -1.694048523902893, "step": 1530 }, { "epoch": 0.37, "learning_rate": 4.869406311285433e-07, "logits/chosen": -2.6833994388580322, "logits/rejected": -2.850263833999634, "logps/chosen": -232.25537109375, "logps/rejected": -290.7073974609375, "loss": 0.5636, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.9992902874946594, "rewards/margins": 0.7843549847602844, "rewards/rejected": -1.7836453914642334, "step": 1540 }, { "epoch": 0.37, "learning_rate": 4.864949188803708e-07, "logits/chosen": -2.731792688369751, "logits/rejected": -2.669426441192627, "logps/chosen": -272.5797424316406, "logps/rejected": -257.44805908203125, "loss": 0.5954, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.6422157883644104, "rewards/margins": 1.3242002725601196, "rewards/rejected": -1.9664160013198853, "step": 1550 }, { "epoch": 0.38, "learning_rate": 4.860492066321983e-07, "logits/chosen": -2.6287994384765625, "logits/rejected": -2.6140027046203613, "logps/chosen": -208.34054565429688, "logps/rejected": -211.9888153076172, "loss": 0.5959, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.674298882484436, "rewards/margins": 0.8545886874198914, "rewards/rejected": -2.5288877487182617, "step": 1560 }, { "epoch": 0.38, "learning_rate": 4.856034943840256e-07, "logits/chosen": -2.547799825668335, "logits/rejected": -2.57979154586792, "logps/chosen": -285.1658630371094, "logps/rejected": -372.6661071777344, "loss": 0.6426, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.1689493656158447, "rewards/margins": 0.5165742635726929, "rewards/rejected": -1.6855236291885376, "step": 1570 }, { "epoch": 0.38, "learning_rate": 4.851577821358531e-07, "logits/chosen": -2.5380287170410156, "logits/rejected": -2.440687894821167, "logps/chosen": -205.4988555908203, "logps/rejected": -277.82818603515625, "loss": 0.5209, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.0934346914291382, "rewards/margins": 2.067070484161377, "rewards/rejected": -3.1605048179626465, "step": 1580 }, { "epoch": 0.38, "learning_rate": 4.847120698876805e-07, "logits/chosen": -2.5879967212677, "logits/rejected": -2.610872268676758, "logps/chosen": -254.39285278320312, "logps/rejected": -231.7747344970703, "loss": 0.6221, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.5625192523002625, "rewards/margins": 1.6473850011825562, "rewards/rejected": -2.2099039554595947, "step": 1590 }, { "epoch": 0.39, "learning_rate": 4.842663576395079e-07, "logits/chosen": -2.5071847438812256, "logits/rejected": -2.4455130100250244, "logps/chosen": -236.2644805908203, "logps/rejected": -242.8035125732422, "loss": 0.7378, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.3004558086395264, "rewards/margins": 1.0624239444732666, "rewards/rejected": -2.362879514694214, "step": 1600 }, { "epoch": 0.39, "eval_logits/chosen": -2.452479600906372, "eval_logits/rejected": -2.423743486404419, "eval_logps/chosen": -218.31736755371094, "eval_logps/rejected": -218.83828735351562, "eval_loss": 0.5212501883506775, "eval_rewards/accuracies": 0.6650000214576721, "eval_rewards/chosen": -1.6389820575714111, "eval_rewards/margins": 1.2917686700820923, "eval_rewards/rejected": -2.930750846862793, "eval_runtime": 132.2425, "eval_samples_per_second": 23.865, "eval_steps_per_second": 0.378, "step": 1600 }, { "epoch": 0.39, "learning_rate": 4.838206453913353e-07, "logits/chosen": -2.909114122390747, "logits/rejected": -2.7687737941741943, "logps/chosen": -298.7687683105469, "logps/rejected": -277.6521301269531, "loss": 0.5541, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -1.6247934103012085, "rewards/margins": 0.7123693227767944, "rewards/rejected": -2.337162733078003, "step": 1610 }, { "epoch": 0.39, "learning_rate": 4.833749331431628e-07, "logits/chosen": -2.768028497695923, "logits/rejected": -2.6386196613311768, "logps/chosen": -237.07260131835938, "logps/rejected": -174.32333374023438, "loss": 0.5605, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.0496273040771484, "rewards/margins": 1.1658755540847778, "rewards/rejected": -2.215503215789795, "step": 1620 }, { "epoch": 0.39, "learning_rate": 4.829292208949901e-07, "logits/chosen": -2.676975965499878, "logits/rejected": -2.677523612976074, "logps/chosen": -228.9412078857422, "logps/rejected": -234.9801788330078, "loss": 0.5557, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.3257936239242554, "rewards/margins": 0.8237881660461426, "rewards/rejected": -2.1495819091796875, "step": 1630 }, { "epoch": 0.39, "learning_rate": 4.824835086468176e-07, "logits/chosen": -2.5803751945495605, "logits/rejected": -2.5565459728240967, "logps/chosen": -263.369384765625, "logps/rejected": -232.2301025390625, "loss": 0.5663, "rewards/accuracies": 0.75, "rewards/chosen": -0.6426874399185181, "rewards/margins": 1.2953565120697021, "rewards/rejected": -1.9380439519882202, "step": 1640 }, { "epoch": 0.4, "learning_rate": 4.82037796398645e-07, "logits/chosen": -2.593459367752075, "logits/rejected": -2.6587584018707275, "logps/chosen": -128.81832885742188, "logps/rejected": -164.3067169189453, "loss": 0.5158, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4609547555446625, "rewards/margins": 1.3585450649261475, "rewards/rejected": -1.8194997310638428, "step": 1650 }, { "epoch": 0.4, "learning_rate": 4.815920841504724e-07, "logits/chosen": -2.5619707107543945, "logits/rejected": -2.4942357540130615, "logps/chosen": -210.661865234375, "logps/rejected": -316.95404052734375, "loss": 0.6839, "rewards/accuracies": 0.75, "rewards/chosen": -0.6866597533226013, "rewards/margins": 1.1276859045028687, "rewards/rejected": -1.8143457174301147, "step": 1660 }, { "epoch": 0.4, "learning_rate": 4.811463719022998e-07, "logits/chosen": -2.5982155799865723, "logits/rejected": -2.50154447555542, "logps/chosen": -353.1432189941406, "logps/rejected": -342.078125, "loss": 0.6005, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.9795411825180054, "rewards/margins": 0.43017083406448364, "rewards/rejected": -2.409712314605713, "step": 1670 }, { "epoch": 0.4, "learning_rate": 4.807006596541273e-07, "logits/chosen": -2.520991086959839, "logits/rejected": -2.565416097640991, "logps/chosen": -164.8997039794922, "logps/rejected": -187.4937286376953, "loss": 0.576, "rewards/accuracies": 0.75, "rewards/chosen": -0.5802173018455505, "rewards/margins": 0.9242179989814758, "rewards/rejected": -1.5044353008270264, "step": 1680 }, { "epoch": 0.41, "learning_rate": 4.802549474059546e-07, "logits/chosen": -2.5648231506347656, "logits/rejected": -2.5870213508605957, "logps/chosen": -176.57669067382812, "logps/rejected": -212.6230010986328, "loss": 0.6139, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2881603538990021, "rewards/margins": 1.1466704607009888, "rewards/rejected": -1.434830904006958, "step": 1690 }, { "epoch": 0.41, "learning_rate": 4.798092351577821e-07, "logits/chosen": -2.745094060897827, "logits/rejected": -2.6509552001953125, "logps/chosen": -312.0198669433594, "logps/rejected": -291.9124755859375, "loss": 0.5733, "rewards/accuracies": 0.75, "rewards/chosen": -1.5168651342391968, "rewards/margins": 0.7483233213424683, "rewards/rejected": -2.265188455581665, "step": 1700 }, { "epoch": 0.41, "learning_rate": 4.793635229096096e-07, "logits/chosen": -2.6192471981048584, "logits/rejected": -2.6161856651306152, "logps/chosen": -229.7698516845703, "logps/rejected": -230.5714111328125, "loss": 0.4965, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.7330110669136047, "rewards/margins": 0.8800470232963562, "rewards/rejected": -1.613058090209961, "step": 1710 }, { "epoch": 0.41, "learning_rate": 4.789178106614369e-07, "logits/chosen": -2.5570099353790283, "logits/rejected": -2.6017849445343018, "logps/chosen": -223.7205047607422, "logps/rejected": -221.4395294189453, "loss": 0.4851, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.1352211236953735, "rewards/margins": 1.2103431224822998, "rewards/rejected": -2.345564365386963, "step": 1720 }, { "epoch": 0.42, "learning_rate": 4.784720984132644e-07, "logits/chosen": -2.5730772018432617, "logits/rejected": -2.583688735961914, "logps/chosen": -199.51190185546875, "logps/rejected": -205.45703125, "loss": 0.5504, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.759188711643219, "rewards/margins": 1.5073745250701904, "rewards/rejected": -2.2665631771087646, "step": 1730 }, { "epoch": 0.42, "learning_rate": 4.780263861650918e-07, "logits/chosen": -2.744879722595215, "logits/rejected": -2.656869888305664, "logps/chosen": -243.7637481689453, "logps/rejected": -241.1142578125, "loss": 0.5155, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -1.3867835998535156, "rewards/margins": 0.6124585866928101, "rewards/rejected": -1.9992421865463257, "step": 1740 }, { "epoch": 0.42, "learning_rate": 4.775806739169192e-07, "logits/chosen": -2.6300344467163086, "logits/rejected": -2.5715746879577637, "logps/chosen": -302.39447021484375, "logps/rejected": -271.0872497558594, "loss": 0.5327, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.7226763963699341, "rewards/margins": 1.8533964157104492, "rewards/rejected": -2.5760726928710938, "step": 1750 }, { "epoch": 0.42, "learning_rate": 4.771349616687466e-07, "logits/chosen": -2.661221981048584, "logits/rejected": -2.6473538875579834, "logps/chosen": -361.41229248046875, "logps/rejected": -344.7325134277344, "loss": 0.4577, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5991467237472534, "rewards/margins": 1.644186019897461, "rewards/rejected": -2.243332624435425, "step": 1760 }, { "epoch": 0.43, "learning_rate": 4.7668924942057403e-07, "logits/chosen": -2.6630051136016846, "logits/rejected": -2.716240406036377, "logps/chosen": -316.25604248046875, "logps/rejected": -263.24285888671875, "loss": 0.5225, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3840251863002777, "rewards/margins": 1.877362847328186, "rewards/rejected": -2.261387825012207, "step": 1770 }, { "epoch": 0.43, "learning_rate": 4.7624353717240143e-07, "logits/chosen": -2.6680986881256104, "logits/rejected": -2.696406364440918, "logps/chosen": -168.38442993164062, "logps/rejected": -248.2466583251953, "loss": 0.5948, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.7542056441307068, "rewards/margins": 1.5609080791473389, "rewards/rejected": -2.3151137828826904, "step": 1780 }, { "epoch": 0.43, "learning_rate": 4.757978249242289e-07, "logits/chosen": -2.7498373985290527, "logits/rejected": -2.681201696395874, "logps/chosen": -416.79718017578125, "logps/rejected": -327.98785400390625, "loss": 0.6477, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.382917582988739, "rewards/margins": 0.13252462446689606, "rewards/rejected": -0.5154422521591187, "step": 1790 }, { "epoch": 0.43, "learning_rate": 4.753521126760563e-07, "logits/chosen": -2.6633782386779785, "logits/rejected": -2.591684103012085, "logps/chosen": -246.0820770263672, "logps/rejected": -177.46151733398438, "loss": 0.5584, "rewards/accuracies": 0.75, "rewards/chosen": 0.04203291982412338, "rewards/margins": 0.8560365438461304, "rewards/rejected": -0.8140036463737488, "step": 1800 }, { "epoch": 0.44, "learning_rate": 4.749064004278837e-07, "logits/chosen": -2.725090980529785, "logits/rejected": -2.6943840980529785, "logps/chosen": -340.26446533203125, "logps/rejected": -339.68243408203125, "loss": 0.4855, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3830810487270355, "rewards/margins": 1.02919602394104, "rewards/rejected": -1.4122769832611084, "step": 1810 }, { "epoch": 0.44, "learning_rate": 4.7446068817971115e-07, "logits/chosen": -2.5046231746673584, "logits/rejected": -2.5039780139923096, "logps/chosen": -250.435546875, "logps/rejected": -284.41912841796875, "loss": 0.5042, "rewards/accuracies": 0.5, "rewards/chosen": -0.7816805243492126, "rewards/margins": 0.39527979493141174, "rewards/rejected": -1.1769602298736572, "step": 1820 }, { "epoch": 0.44, "learning_rate": 4.7401497593153855e-07, "logits/chosen": -2.597954273223877, "logits/rejected": -2.5391862392425537, "logps/chosen": -291.6734313964844, "logps/rejected": -251.46337890625, "loss": 0.6315, "rewards/accuracies": 0.75, "rewards/chosen": -0.6559056639671326, "rewards/margins": 1.4835684299468994, "rewards/rejected": -2.1394739151000977, "step": 1830 }, { "epoch": 0.44, "learning_rate": 4.7356926368336596e-07, "logits/chosen": -2.6696648597717285, "logits/rejected": -2.668829917907715, "logps/chosen": -183.03819274902344, "logps/rejected": -201.14859008789062, "loss": 0.62, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.9224878549575806, "rewards/margins": 0.998814582824707, "rewards/rejected": -1.9213024377822876, "step": 1840 }, { "epoch": 0.45, "learning_rate": 4.731235514351934e-07, "logits/chosen": -2.6900339126586914, "logits/rejected": -2.670243263244629, "logps/chosen": -224.8537139892578, "logps/rejected": -237.53518676757812, "loss": 0.6317, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.1691210269927979, "rewards/margins": 0.45907098054885864, "rewards/rejected": -1.6281919479370117, "step": 1850 }, { "epoch": 0.45, "learning_rate": 4.726778391870208e-07, "logits/chosen": -2.5648159980773926, "logits/rejected": -2.470302104949951, "logps/chosen": -237.4529266357422, "logps/rejected": -268.68359375, "loss": 0.5522, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.4968430995941162, "rewards/margins": 1.7916736602783203, "rewards/rejected": -2.2885167598724365, "step": 1860 }, { "epoch": 0.45, "learning_rate": 4.7223212693884827e-07, "logits/chosen": -2.6383121013641357, "logits/rejected": -2.6759159564971924, "logps/chosen": -189.407958984375, "logps/rejected": -209.6025390625, "loss": 0.5415, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.683796763420105, "rewards/margins": 1.0064215660095215, "rewards/rejected": -1.6902183294296265, "step": 1870 }, { "epoch": 0.45, "learning_rate": 4.7178641469067573e-07, "logits/chosen": -2.578706979751587, "logits/rejected": -2.5927414894104004, "logps/chosen": -236.52951049804688, "logps/rejected": -260.4093933105469, "loss": 0.5788, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.6235929727554321, "rewards/margins": 0.8559904098510742, "rewards/rejected": -1.479583501815796, "step": 1880 }, { "epoch": 0.45, "learning_rate": 4.7134070244250313e-07, "logits/chosen": -2.6419436931610107, "logits/rejected": -2.5469255447387695, "logps/chosen": -281.54901123046875, "logps/rejected": -227.68453979492188, "loss": 0.5113, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.8387514352798462, "rewards/margins": 1.2184150218963623, "rewards/rejected": -2.057166337966919, "step": 1890 }, { "epoch": 0.46, "learning_rate": 4.7089499019433053e-07, "logits/chosen": -2.676638126373291, "logits/rejected": -2.459538459777832, "logps/chosen": -295.4486389160156, "logps/rejected": -287.54119873046875, "loss": 0.5771, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.9650393724441528, "rewards/margins": 1.7956863641738892, "rewards/rejected": -2.760725498199463, "step": 1900 }, { "epoch": 0.46, "learning_rate": 4.70449277946158e-07, "logits/chosen": -2.6179847717285156, "logits/rejected": -2.6216704845428467, "logps/chosen": -258.857177734375, "logps/rejected": -261.48638916015625, "loss": 0.5117, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.7386892437934875, "rewards/margins": 0.7860111594200134, "rewards/rejected": -1.524700403213501, "step": 1910 }, { "epoch": 0.46, "learning_rate": 4.700035656979854e-07, "logits/chosen": -2.4984524250030518, "logits/rejected": -2.5021138191223145, "logps/chosen": -280.54449462890625, "logps/rejected": -229.6431121826172, "loss": 0.7432, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.6804414391517639, "rewards/margins": 0.9229552149772644, "rewards/rejected": -1.6033966541290283, "step": 1920 }, { "epoch": 0.46, "learning_rate": 4.695578534498128e-07, "logits/chosen": -2.5745809078216553, "logits/rejected": -2.5078747272491455, "logps/chosen": -272.56683349609375, "logps/rejected": -302.67041015625, "loss": 0.5336, "rewards/accuracies": 0.75, "rewards/chosen": -0.08346471190452576, "rewards/margins": 1.2559754848480225, "rewards/rejected": -1.3394403457641602, "step": 1930 }, { "epoch": 0.47, "learning_rate": 4.691121412016402e-07, "logits/chosen": -2.747448205947876, "logits/rejected": -2.6069045066833496, "logps/chosen": -237.12060546875, "logps/rejected": -253.00033569335938, "loss": 0.4895, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3581007421016693, "rewards/margins": 1.0566563606262207, "rewards/rejected": -1.4147570133209229, "step": 1940 }, { "epoch": 0.47, "learning_rate": 4.6866642895346765e-07, "logits/chosen": -2.6678454875946045, "logits/rejected": -2.5825304985046387, "logps/chosen": -202.19448852539062, "logps/rejected": -212.3162384033203, "loss": 0.5431, "rewards/accuracies": 0.75, "rewards/chosen": -1.2410422563552856, "rewards/margins": 1.332786202430725, "rewards/rejected": -2.57382869720459, "step": 1950 }, { "epoch": 0.47, "learning_rate": 4.6822071670529506e-07, "logits/chosen": -2.569096803665161, "logits/rejected": -2.4656262397766113, "logps/chosen": -274.194580078125, "logps/rejected": -203.11862182617188, "loss": 0.5308, "rewards/accuracies": 0.75, "rewards/chosen": -1.0657583475112915, "rewards/margins": 1.6224472522735596, "rewards/rejected": -2.6882054805755615, "step": 1960 }, { "epoch": 0.47, "learning_rate": 4.6777500445712246e-07, "logits/chosen": -2.5886142253875732, "logits/rejected": -2.4780044555664062, "logps/chosen": -217.68875122070312, "logps/rejected": -191.03097534179688, "loss": 0.5237, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.506932020187378, "rewards/margins": 0.9141137003898621, "rewards/rejected": -2.4210457801818848, "step": 1970 }, { "epoch": 0.48, "learning_rate": 4.673292922089499e-07, "logits/chosen": -2.664307117462158, "logits/rejected": -2.741649866104126, "logps/chosen": -227.2781982421875, "logps/rejected": -243.3974609375, "loss": 0.735, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.9387329816818237, "rewards/margins": 1.4798619747161865, "rewards/rejected": -2.4185948371887207, "step": 1980 }, { "epoch": 0.48, "learning_rate": 4.668835799607773e-07, "logits/chosen": -2.511784076690674, "logits/rejected": -2.568660020828247, "logps/chosen": -251.56588745117188, "logps/rejected": -237.3352813720703, "loss": 0.5213, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.559882402420044, "rewards/margins": 1.6572726964950562, "rewards/rejected": -3.2171554565429688, "step": 1990 }, { "epoch": 0.48, "learning_rate": 4.664378677126047e-07, "logits/chosen": -2.5120930671691895, "logits/rejected": -2.4584739208221436, "logps/chosen": -405.72674560546875, "logps/rejected": -305.15484619140625, "loss": 0.7467, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.2938706874847412, "rewards/margins": 1.316383957862854, "rewards/rejected": -2.6102547645568848, "step": 2000 }, { "epoch": 0.48, "eval_logits/chosen": -2.444129705429077, "eval_logits/rejected": -2.410576820373535, "eval_logps/chosen": -224.02638244628906, "eval_logps/rejected": -223.77809143066406, "eval_loss": 0.5787909626960754, "eval_rewards/accuracies": 0.6575000286102295, "eval_rewards/chosen": -2.209883213043213, "eval_rewards/margins": 1.2148469686508179, "eval_rewards/rejected": -3.4247303009033203, "eval_runtime": 131.4945, "eval_samples_per_second": 24.001, "eval_steps_per_second": 0.38, "step": 2000 }, { "epoch": 0.48, "learning_rate": 4.659921554644322e-07, "logits/chosen": -2.6425423622131348, "logits/rejected": -2.6530404090881348, "logps/chosen": -226.0823211669922, "logps/rejected": -147.59510803222656, "loss": 0.5758, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.9719376564025879, "rewards/margins": 1.2038328647613525, "rewards/rejected": -2.1757702827453613, "step": 2010 }, { "epoch": 0.49, "learning_rate": 4.655464432162596e-07, "logits/chosen": -2.753293991088867, "logits/rejected": -2.5513646602630615, "logps/chosen": -277.55706787109375, "logps/rejected": -226.85574340820312, "loss": 0.5395, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.108198642730713, "rewards/margins": 1.5984714031219482, "rewards/rejected": -2.706670045852661, "step": 2020 }, { "epoch": 0.49, "learning_rate": 4.65100730968087e-07, "logits/chosen": -2.6071271896362305, "logits/rejected": -2.6852943897247314, "logps/chosen": -255.02090454101562, "logps/rejected": -245.03759765625, "loss": 0.5993, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.428769588470459, "rewards/margins": 1.5921337604522705, "rewards/rejected": -3.0209031105041504, "step": 2030 }, { "epoch": 0.49, "learning_rate": 4.6465501871991444e-07, "logits/chosen": -2.7704215049743652, "logits/rejected": -2.6319820880889893, "logps/chosen": -280.4167175292969, "logps/rejected": -228.3472900390625, "loss": 0.646, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.04085111618042, "rewards/margins": 1.1138665676116943, "rewards/rejected": -2.1547179222106934, "step": 2040 }, { "epoch": 0.49, "learning_rate": 4.6420930647174184e-07, "logits/chosen": -2.737074375152588, "logits/rejected": -2.7383949756622314, "logps/chosen": -272.1175231933594, "logps/rejected": -234.6374053955078, "loss": 0.5736, "rewards/accuracies": 0.75, "rewards/chosen": -1.098402500152588, "rewards/margins": 0.6982945203781128, "rewards/rejected": -1.7966970205307007, "step": 2050 }, { "epoch": 0.5, "learning_rate": 4.6376359422356924e-07, "logits/chosen": -2.6888365745544434, "logits/rejected": -2.6673827171325684, "logps/chosen": -280.63128662109375, "logps/rejected": -310.49908447265625, "loss": 0.5724, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.3983144760131836, "rewards/margins": 0.6582023501396179, "rewards/rejected": -2.0565168857574463, "step": 2060 }, { "epoch": 0.5, "learning_rate": 4.633178819753967e-07, "logits/chosen": -2.8046553134918213, "logits/rejected": -2.784102439880371, "logps/chosen": -249.0012664794922, "logps/rejected": -285.3121337890625, "loss": 0.5372, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.148695468902588, "rewards/margins": 1.1670068502426147, "rewards/rejected": -2.3157026767730713, "step": 2070 }, { "epoch": 0.5, "learning_rate": 4.628721697272241e-07, "logits/chosen": -2.5930941104888916, "logits/rejected": -2.52286958694458, "logps/chosen": -249.0924835205078, "logps/rejected": -229.8943328857422, "loss": 0.8827, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.8799583911895752, "rewards/margins": 1.608770728111267, "rewards/rejected": -3.488729476928711, "step": 2080 }, { "epoch": 0.5, "learning_rate": 4.624264574790515e-07, "logits/chosen": -2.737677574157715, "logits/rejected": -2.589430332183838, "logps/chosen": -261.4295654296875, "logps/rejected": -243.833251953125, "loss": 0.5408, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.307201862335205, "rewards/margins": 1.4561229944229126, "rewards/rejected": -2.7633252143859863, "step": 2090 }, { "epoch": 0.51, "learning_rate": 4.619807452308789e-07, "logits/chosen": -2.608551502227783, "logits/rejected": -2.5399093627929688, "logps/chosen": -303.45208740234375, "logps/rejected": -263.0534973144531, "loss": 0.5456, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.5837156772613525, "rewards/margins": 1.0279721021652222, "rewards/rejected": -2.6116878986358643, "step": 2100 }, { "epoch": 0.51, "learning_rate": 4.6153503298270636e-07, "logits/chosen": -2.611738443374634, "logits/rejected": -2.6551427841186523, "logps/chosen": -306.83026123046875, "logps/rejected": -350.64312744140625, "loss": 1.1069, "rewards/accuracies": 0.75, "rewards/chosen": -0.8476687669754028, "rewards/margins": 1.386994481086731, "rewards/rejected": -2.2346630096435547, "step": 2110 }, { "epoch": 0.51, "learning_rate": 4.6108932073453377e-07, "logits/chosen": -2.672241687774658, "logits/rejected": -2.5617220401763916, "logps/chosen": -382.13458251953125, "logps/rejected": -303.9514465332031, "loss": 0.5562, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.8237565755844116, "rewards/margins": 0.574665904045105, "rewards/rejected": -2.3984227180480957, "step": 2120 }, { "epoch": 0.51, "learning_rate": 4.6064360848636117e-07, "logits/chosen": -2.686922550201416, "logits/rejected": -2.598443031311035, "logps/chosen": -272.69525146484375, "logps/rejected": -339.1822204589844, "loss": 0.613, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.4758816957473755, "rewards/margins": 0.8712828755378723, "rewards/rejected": -2.3471646308898926, "step": 2130 }, { "epoch": 0.52, "learning_rate": 4.601978962381886e-07, "logits/chosen": -2.5901780128479004, "logits/rejected": -2.5972065925598145, "logps/chosen": -253.97372436523438, "logps/rejected": -263.4020080566406, "loss": 0.4924, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.192458152770996, "rewards/margins": 0.78331458568573, "rewards/rejected": -1.9757726192474365, "step": 2140 }, { "epoch": 0.52, "learning_rate": 4.5975218399001603e-07, "logits/chosen": -2.484832286834717, "logits/rejected": -2.5052075386047363, "logps/chosen": -261.0078430175781, "logps/rejected": -297.71820068359375, "loss": 0.6843, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.141125440597534, "rewards/margins": 0.289185494184494, "rewards/rejected": -2.4303107261657715, "step": 2150 }, { "epoch": 0.52, "learning_rate": 4.5930647174184343e-07, "logits/chosen": -2.613530397415161, "logits/rejected": -2.641186475753784, "logps/chosen": -268.1658935546875, "logps/rejected": -272.3964538574219, "loss": 0.4863, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.6121963262557983, "rewards/margins": 1.899646520614624, "rewards/rejected": -2.511842727661133, "step": 2160 }, { "epoch": 0.52, "learning_rate": 4.588607594936709e-07, "logits/chosen": -2.601377487182617, "logits/rejected": -2.5596752166748047, "logps/chosen": -237.232421875, "logps/rejected": -274.5160827636719, "loss": 0.6326, "rewards/accuracies": 0.5, "rewards/chosen": -1.6485188007354736, "rewards/margins": 0.939397931098938, "rewards/rejected": -2.587916612625122, "step": 2170 }, { "epoch": 0.52, "learning_rate": 4.584150472454983e-07, "logits/chosen": -2.469336986541748, "logits/rejected": -2.4013397693634033, "logps/chosen": -185.54568481445312, "logps/rejected": -176.74929809570312, "loss": 0.5309, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.1838127374649048, "rewards/margins": 1.5106571912765503, "rewards/rejected": -2.694469928741455, "step": 2180 }, { "epoch": 0.53, "learning_rate": 4.579693349973257e-07, "logits/chosen": -2.5632500648498535, "logits/rejected": -2.574326515197754, "logps/chosen": -185.436279296875, "logps/rejected": -207.86325073242188, "loss": 0.6808, "rewards/accuracies": 0.75, "rewards/chosen": -1.0079572200775146, "rewards/margins": 1.917676329612732, "rewards/rejected": -2.925633192062378, "step": 2190 }, { "epoch": 0.53, "learning_rate": 4.5752362274915315e-07, "logits/chosen": -2.6746857166290283, "logits/rejected": -2.6083481311798096, "logps/chosen": -223.94442749023438, "logps/rejected": -205.3218536376953, "loss": 0.4763, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.9791423678398132, "rewards/margins": 1.406191110610962, "rewards/rejected": -2.38533353805542, "step": 2200 }, { "epoch": 0.53, "learning_rate": 4.5707791050098055e-07, "logits/chosen": -2.6126599311828613, "logits/rejected": -2.6058077812194824, "logps/chosen": -301.7843322753906, "logps/rejected": -257.8680725097656, "loss": 0.6266, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1785779446363449, "rewards/margins": 1.7274401187896729, "rewards/rejected": -1.9060180187225342, "step": 2210 }, { "epoch": 0.53, "learning_rate": 4.5663219825280795e-07, "logits/chosen": -2.6819117069244385, "logits/rejected": -2.62992525100708, "logps/chosen": -245.77481079101562, "logps/rejected": -219.6219482421875, "loss": 0.7883, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.7871536016464233, "rewards/margins": 0.5845667123794556, "rewards/rejected": -1.371720552444458, "step": 2220 }, { "epoch": 0.54, "learning_rate": 4.561864860046354e-07, "logits/chosen": -2.6489009857177734, "logits/rejected": -2.623767852783203, "logps/chosen": -212.5379180908203, "logps/rejected": -164.32310485839844, "loss": 0.6103, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.6828866600990295, "rewards/margins": 0.5492819547653198, "rewards/rejected": -1.232168436050415, "step": 2230 }, { "epoch": 0.54, "learning_rate": 4.557407737564628e-07, "logits/chosen": -2.5661842823028564, "logits/rejected": -2.5723376274108887, "logps/chosen": -206.6239776611328, "logps/rejected": -214.72195434570312, "loss": 0.462, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.9324319958686829, "rewards/margins": 1.2257258892059326, "rewards/rejected": -2.1581578254699707, "step": 2240 }, { "epoch": 0.54, "learning_rate": 4.552950615082902e-07, "logits/chosen": -2.4822638034820557, "logits/rejected": -2.4895377159118652, "logps/chosen": -242.5191650390625, "logps/rejected": -286.22686767578125, "loss": 0.6882, "rewards/accuracies": 0.75, "rewards/chosen": -0.7670876383781433, "rewards/margins": 1.1064833402633667, "rewards/rejected": -1.8735707998275757, "step": 2250 }, { "epoch": 0.54, "learning_rate": 4.548493492601176e-07, "logits/chosen": -2.8437228202819824, "logits/rejected": -2.701927661895752, "logps/chosen": -276.03564453125, "logps/rejected": -298.08099365234375, "loss": 0.5096, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.7756320238113403, "rewards/margins": 2.3020224571228027, "rewards/rejected": -3.0776543617248535, "step": 2260 }, { "epoch": 0.55, "learning_rate": 4.544036370119451e-07, "logits/chosen": -2.841212749481201, "logits/rejected": -2.7204411029815674, "logps/chosen": -428.544189453125, "logps/rejected": -347.85723876953125, "loss": 0.4426, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.2579714059829712, "rewards/margins": 1.2345914840698242, "rewards/rejected": -2.492562770843506, "step": 2270 }, { "epoch": 0.55, "learning_rate": 4.539579247637725e-07, "logits/chosen": -2.5310349464416504, "logits/rejected": -2.4994027614593506, "logps/chosen": -274.86663818359375, "logps/rejected": -238.88623046875, "loss": 0.8814, "rewards/accuracies": 0.75, "rewards/chosen": -0.258170485496521, "rewards/margins": 1.6624419689178467, "rewards/rejected": -1.9206125736236572, "step": 2280 }, { "epoch": 0.55, "learning_rate": 4.535122125155999e-07, "logits/chosen": -2.3999197483062744, "logits/rejected": -2.255828380584717, "logps/chosen": -351.51837158203125, "logps/rejected": -316.54791259765625, "loss": 0.9701, "rewards/accuracies": 0.75, "rewards/chosen": -1.1201560497283936, "rewards/margins": 1.114150047302246, "rewards/rejected": -2.2343058586120605, "step": 2290 }, { "epoch": 0.55, "learning_rate": 4.5306650026742734e-07, "logits/chosen": -2.7634806632995605, "logits/rejected": -2.5712170600891113, "logps/chosen": -252.85025024414062, "logps/rejected": -197.05996704101562, "loss": 0.4855, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.760390043258667, "rewards/margins": 1.4791388511657715, "rewards/rejected": -3.2395293712615967, "step": 2300 }, { "epoch": 0.56, "learning_rate": 4.5262078801925474e-07, "logits/chosen": -2.6070544719696045, "logits/rejected": -2.566847562789917, "logps/chosen": -208.426025390625, "logps/rejected": -206.1570281982422, "loss": 0.5186, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.680463194847107, "rewards/margins": 1.101180911064148, "rewards/rejected": -2.781644344329834, "step": 2310 }, { "epoch": 0.56, "learning_rate": 4.5217507577108214e-07, "logits/chosen": -2.776313066482544, "logits/rejected": -2.7298378944396973, "logps/chosen": -214.5133056640625, "logps/rejected": -248.0509033203125, "loss": 0.5608, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.9462732076644897, "rewards/margins": 1.8917815685272217, "rewards/rejected": -2.83805513381958, "step": 2320 }, { "epoch": 0.56, "learning_rate": 4.517293635229096e-07, "logits/chosen": -2.7545807361602783, "logits/rejected": -2.642467498779297, "logps/chosen": -243.28292846679688, "logps/rejected": -211.8591766357422, "loss": 0.7106, "rewards/accuracies": 0.75, "rewards/chosen": -0.5913723111152649, "rewards/margins": 1.7651437520980835, "rewards/rejected": -2.3565163612365723, "step": 2330 }, { "epoch": 0.56, "learning_rate": 4.51283651274737e-07, "logits/chosen": -2.4426515102386475, "logits/rejected": -2.401933193206787, "logps/chosen": -229.60049438476562, "logps/rejected": -246.3610382080078, "loss": 0.6211, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.2280629873275757, "rewards/margins": 3.3658013343811035, "rewards/rejected": -4.5938639640808105, "step": 2340 }, { "epoch": 0.57, "learning_rate": 4.508379390265644e-07, "logits/chosen": -2.621298313140869, "logits/rejected": -2.545727491378784, "logps/chosen": -201.02578735351562, "logps/rejected": -177.73696899414062, "loss": 0.5226, "rewards/accuracies": 0.75, "rewards/chosen": -1.283278465270996, "rewards/margins": 1.3395992517471313, "rewards/rejected": -2.622877597808838, "step": 2350 }, { "epoch": 0.57, "learning_rate": 4.5039222677839186e-07, "logits/chosen": -2.681291341781616, "logits/rejected": -2.6904656887054443, "logps/chosen": -342.61358642578125, "logps/rejected": -312.4060974121094, "loss": 0.4658, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.7542943954467773, "rewards/margins": 0.9285749197006226, "rewards/rejected": -2.6828696727752686, "step": 2360 }, { "epoch": 0.57, "learning_rate": 4.4994651453021926e-07, "logits/chosen": -2.5337376594543457, "logits/rejected": -2.5319621562957764, "logps/chosen": -237.2696075439453, "logps/rejected": -248.96102905273438, "loss": 0.4649, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.1634007692337036, "rewards/margins": 2.1607351303100586, "rewards/rejected": -3.3241360187530518, "step": 2370 }, { "epoch": 0.57, "learning_rate": 4.4950080228204666e-07, "logits/chosen": -2.6067795753479004, "logits/rejected": -2.628153085708618, "logps/chosen": -334.35369873046875, "logps/rejected": -355.157470703125, "loss": 0.5153, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.9229193925857544, "rewards/margins": 2.240133047103882, "rewards/rejected": -3.1630523204803467, "step": 2380 }, { "epoch": 0.58, "learning_rate": 4.490550900338741e-07, "logits/chosen": -2.74542498588562, "logits/rejected": -2.5125811100006104, "logps/chosen": -236.3998565673828, "logps/rejected": -233.5565643310547, "loss": 0.4774, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.290185570716858, "rewards/margins": 1.9261367321014404, "rewards/rejected": -3.216322422027588, "step": 2390 }, { "epoch": 0.58, "learning_rate": 4.486093777857015e-07, "logits/chosen": -2.5763707160949707, "logits/rejected": -2.471097469329834, "logps/chosen": -193.45892333984375, "logps/rejected": -204.922607421875, "loss": 0.4646, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.373281478881836, "rewards/margins": 2.0415844917297363, "rewards/rejected": -3.4148662090301514, "step": 2400 }, { "epoch": 0.58, "eval_logits/chosen": -2.3994462490081787, "eval_logits/rejected": -2.3682971000671387, "eval_logps/chosen": -213.28709411621094, "eval_logps/rejected": -216.52792358398438, "eval_loss": 0.5308729410171509, "eval_rewards/accuracies": 0.6499999761581421, "eval_rewards/chosen": -1.1359529495239258, "eval_rewards/margins": 1.5637621879577637, "eval_rewards/rejected": -2.6997153759002686, "eval_runtime": 133.6166, "eval_samples_per_second": 23.62, "eval_steps_per_second": 0.374, "step": 2400 }, { "epoch": 0.58, "learning_rate": 4.481636655375289e-07, "logits/chosen": -2.729184627532959, "logits/rejected": -2.6641573905944824, "logps/chosen": -295.86700439453125, "logps/rejected": -331.05535888671875, "loss": 0.6653, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.6393815279006958, "rewards/margins": 1.4067280292510986, "rewards/rejected": -2.046109676361084, "step": 2410 }, { "epoch": 0.58, "learning_rate": 4.4771795328935633e-07, "logits/chosen": -2.7169833183288574, "logits/rejected": -2.603271484375, "logps/chosen": -278.91632080078125, "logps/rejected": -227.692138671875, "loss": 0.7691, "rewards/accuracies": 0.75, "rewards/chosen": -0.35997486114501953, "rewards/margins": 1.3083598613739014, "rewards/rejected": -1.6683346033096313, "step": 2420 }, { "epoch": 0.58, "learning_rate": 4.472722410411838e-07, "logits/chosen": -2.6175537109375, "logits/rejected": -2.6558189392089844, "logps/chosen": -192.9453125, "logps/rejected": -223.7886199951172, "loss": 0.4445, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.15358349680900574, "rewards/margins": 2.531790256500244, "rewards/rejected": -2.6853737831115723, "step": 2430 }, { "epoch": 0.59, "learning_rate": 4.468265287930112e-07, "logits/chosen": -2.7276058197021484, "logits/rejected": -2.6745474338531494, "logps/chosen": -276.5566711425781, "logps/rejected": -318.369873046875, "loss": 0.4895, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.425123929977417, "rewards/margins": 0.6360111832618713, "rewards/rejected": -2.0611350536346436, "step": 2440 }, { "epoch": 0.59, "learning_rate": 4.463808165448386e-07, "logits/chosen": -2.7597084045410156, "logits/rejected": -2.6362688541412354, "logps/chosen": -294.7146301269531, "logps/rejected": -250.98165893554688, "loss": 0.4817, "rewards/accuracies": 0.75, "rewards/chosen": -1.0659924745559692, "rewards/margins": 1.2486473321914673, "rewards/rejected": -2.3146398067474365, "step": 2450 }, { "epoch": 0.59, "learning_rate": 4.4593510429666605e-07, "logits/chosen": -2.6981377601623535, "logits/rejected": -2.7366175651550293, "logps/chosen": -265.9452819824219, "logps/rejected": -277.0174865722656, "loss": 0.548, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.3343164920806885, "rewards/margins": 1.3074525594711304, "rewards/rejected": -2.6417689323425293, "step": 2460 }, { "epoch": 0.59, "learning_rate": 4.4548939204849345e-07, "logits/chosen": -2.667603015899658, "logits/rejected": -2.713460683822632, "logps/chosen": -264.29034423828125, "logps/rejected": -252.31436157226562, "loss": 0.5833, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.8343324661254883, "rewards/margins": 0.6806478500366211, "rewards/rejected": -2.5149803161621094, "step": 2470 }, { "epoch": 0.6, "learning_rate": 4.4504367980032085e-07, "logits/chosen": -2.7401413917541504, "logits/rejected": -2.7046878337860107, "logps/chosen": -321.14385986328125, "logps/rejected": -301.818603515625, "loss": 0.6218, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9978463053703308, "rewards/margins": 1.4452444314956665, "rewards/rejected": -2.4430909156799316, "step": 2480 }, { "epoch": 0.6, "learning_rate": 4.445979675521483e-07, "logits/chosen": -2.6353251934051514, "logits/rejected": -2.392544984817505, "logps/chosen": -313.06915283203125, "logps/rejected": -195.7866668701172, "loss": 0.5543, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.3248649835586548, "rewards/margins": 1.5439631938934326, "rewards/rejected": -2.868828058242798, "step": 2490 }, { "epoch": 0.6, "learning_rate": 4.441522553039757e-07, "logits/chosen": -2.6359786987304688, "logits/rejected": -2.4890024662017822, "logps/chosen": -260.76361083984375, "logps/rejected": -262.39825439453125, "loss": 0.6148, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.1930229663848877, "rewards/margins": 0.27814993262290955, "rewards/rejected": -2.471173048019409, "step": 2500 }, { "epoch": 0.6, "learning_rate": 4.437065430558031e-07, "logits/chosen": -2.6569018363952637, "logits/rejected": -2.6371681690216064, "logps/chosen": -291.4471740722656, "logps/rejected": -333.3192138671875, "loss": 0.6698, "rewards/accuracies": 0.75, "rewards/chosen": -1.289780855178833, "rewards/margins": 1.4741872549057007, "rewards/rejected": -2.763967990875244, "step": 2510 }, { "epoch": 0.61, "learning_rate": 4.4326083080763057e-07, "logits/chosen": -2.5294575691223145, "logits/rejected": -2.5719335079193115, "logps/chosen": -228.34793090820312, "logps/rejected": -250.3420867919922, "loss": 0.4181, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.4886128902435303, "rewards/margins": 2.112856149673462, "rewards/rejected": -3.6014695167541504, "step": 2520 }, { "epoch": 0.61, "learning_rate": 4.4281511855945797e-07, "logits/chosen": -2.682654857635498, "logits/rejected": -2.4943184852600098, "logps/chosen": -200.63919067382812, "logps/rejected": -166.19149780273438, "loss": 0.5266, "rewards/accuracies": 0.75, "rewards/chosen": -1.6488895416259766, "rewards/margins": 1.4922336339950562, "rewards/rejected": -3.141123056411743, "step": 2530 }, { "epoch": 0.61, "learning_rate": 4.423694063112854e-07, "logits/chosen": -2.7275164127349854, "logits/rejected": -2.684523582458496, "logps/chosen": -190.6834716796875, "logps/rejected": -216.94180297851562, "loss": 0.5218, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.3709272146224976, "rewards/margins": 1.007057547569275, "rewards/rejected": -2.3779845237731934, "step": 2540 }, { "epoch": 0.61, "learning_rate": 4.419236940631129e-07, "logits/chosen": -2.5892624855041504, "logits/rejected": -2.5918118953704834, "logps/chosen": -211.045654296875, "logps/rejected": -239.8479766845703, "loss": 0.653, "rewards/accuracies": 0.75, "rewards/chosen": -1.7834230661392212, "rewards/margins": 1.6225885152816772, "rewards/rejected": -3.4060111045837402, "step": 2550 }, { "epoch": 0.62, "learning_rate": 4.414779818149403e-07, "logits/chosen": -2.734849452972412, "logits/rejected": -2.667412519454956, "logps/chosen": -257.10894775390625, "logps/rejected": -244.238525390625, "loss": 0.54, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.6349313259124756, "rewards/margins": 2.199925184249878, "rewards/rejected": -3.8348567485809326, "step": 2560 }, { "epoch": 0.62, "learning_rate": 4.410322695667677e-07, "logits/chosen": -2.6233067512512207, "logits/rejected": -2.5422592163085938, "logps/chosen": -269.6959228515625, "logps/rejected": -316.9816589355469, "loss": 0.5798, "rewards/accuracies": 0.75, "rewards/chosen": -2.6098742485046387, "rewards/margins": 1.4767448902130127, "rewards/rejected": -4.0866193771362305, "step": 2570 }, { "epoch": 0.62, "learning_rate": 4.4058655731859515e-07, "logits/chosen": -2.4794352054595947, "logits/rejected": -2.428112745285034, "logps/chosen": -286.3292236328125, "logps/rejected": -268.1867980957031, "loss": 0.4741, "rewards/accuracies": 0.75, "rewards/chosen": -1.4831401109695435, "rewards/margins": 0.8532403111457825, "rewards/rejected": -2.3363804817199707, "step": 2580 }, { "epoch": 0.62, "learning_rate": 4.4014084507042255e-07, "logits/chosen": -2.5975468158721924, "logits/rejected": -2.4987661838531494, "logps/chosen": -272.4716491699219, "logps/rejected": -291.02294921875, "loss": 0.5666, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.058257818222046, "rewards/margins": 1.1403725147247314, "rewards/rejected": -3.1986308097839355, "step": 2590 }, { "epoch": 0.63, "learning_rate": 4.3969513282224995e-07, "logits/chosen": -2.677935838699341, "logits/rejected": -2.5781166553497314, "logps/chosen": -238.8215789794922, "logps/rejected": -212.71969604492188, "loss": 0.556, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.4124119281768799, "rewards/margins": 1.2017902135849, "rewards/rejected": -2.6142020225524902, "step": 2600 }, { "epoch": 0.63, "learning_rate": 4.3924942057407735e-07, "logits/chosen": -2.680534839630127, "logits/rejected": -2.5556912422180176, "logps/chosen": -263.3976745605469, "logps/rejected": -285.26348876953125, "loss": 0.6082, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.5407580137252808, "rewards/margins": 1.0123125314712524, "rewards/rejected": -2.553070545196533, "step": 2610 }, { "epoch": 0.63, "learning_rate": 4.388037083259048e-07, "logits/chosen": -2.614470958709717, "logits/rejected": -2.510915517807007, "logps/chosen": -390.08685302734375, "logps/rejected": -334.0655822753906, "loss": 0.5706, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.8687372207641602, "rewards/margins": 1.8866193294525146, "rewards/rejected": -3.755356550216675, "step": 2620 }, { "epoch": 0.63, "learning_rate": 4.383579960777322e-07, "logits/chosen": -2.533573627471924, "logits/rejected": -2.6349072456359863, "logps/chosen": -263.72540283203125, "logps/rejected": -261.92388916015625, "loss": 0.5274, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.517315149307251, "rewards/margins": 1.144661784172058, "rewards/rejected": -3.6619770526885986, "step": 2630 }, { "epoch": 0.64, "learning_rate": 4.379122838295596e-07, "logits/chosen": -2.620600461959839, "logits/rejected": -2.5007054805755615, "logps/chosen": -364.69366455078125, "logps/rejected": -349.27996826171875, "loss": 0.5159, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.1532578468322754, "rewards/margins": 1.53290593624115, "rewards/rejected": -3.686164140701294, "step": 2640 }, { "epoch": 0.64, "learning_rate": 4.3746657158138707e-07, "logits/chosen": -2.3059136867523193, "logits/rejected": -2.1973063945770264, "logps/chosen": -226.91806030273438, "logps/rejected": -202.16848754882812, "loss": 0.634, "rewards/accuracies": 0.5, "rewards/chosen": -2.9682247638702393, "rewards/margins": 0.32376235723495483, "rewards/rejected": -3.291987180709839, "step": 2650 }, { "epoch": 0.64, "learning_rate": 4.370208593332145e-07, "logits/chosen": -2.5784239768981934, "logits/rejected": -2.463409185409546, "logps/chosen": -331.9124755859375, "logps/rejected": -249.6974334716797, "loss": 0.5939, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.851837158203125, "rewards/margins": 1.636743187904358, "rewards/rejected": -3.4885807037353516, "step": 2660 }, { "epoch": 0.64, "learning_rate": 4.365751470850419e-07, "logits/chosen": -2.7009811401367188, "logits/rejected": -2.597710132598877, "logps/chosen": -294.39959716796875, "logps/rejected": -238.6754913330078, "loss": 0.5219, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.5454216003417969, "rewards/margins": 1.1799639463424683, "rewards/rejected": -2.7253854274749756, "step": 2670 }, { "epoch": 0.65, "learning_rate": 4.3612943483686933e-07, "logits/chosen": -2.5441927909851074, "logits/rejected": -2.569514513015747, "logps/chosen": -254.97341918945312, "logps/rejected": -258.8553161621094, "loss": 0.5772, "rewards/accuracies": 0.5, "rewards/chosen": -2.0851924419403076, "rewards/margins": 1.121181845664978, "rewards/rejected": -3.206374406814575, "step": 2680 }, { "epoch": 0.65, "learning_rate": 4.3568372258869674e-07, "logits/chosen": -2.5438809394836426, "logits/rejected": -2.5418128967285156, "logps/chosen": -240.10855102539062, "logps/rejected": -264.3448791503906, "loss": 0.6832, "rewards/accuracies": 0.75, "rewards/chosen": -1.7431503534317017, "rewards/margins": 1.5108040571212769, "rewards/rejected": -3.2539544105529785, "step": 2690 }, { "epoch": 0.65, "learning_rate": 4.3523801034052414e-07, "logits/chosen": -2.629892110824585, "logits/rejected": -2.612617015838623, "logps/chosen": -258.5018005371094, "logps/rejected": -254.3931427001953, "loss": 0.5957, "rewards/accuracies": 0.5, "rewards/chosen": -1.77963125705719, "rewards/margins": 0.42840784788131714, "rewards/rejected": -2.2080390453338623, "step": 2700 }, { "epoch": 0.65, "learning_rate": 4.347922980923516e-07, "logits/chosen": -2.6034722328186035, "logits/rejected": -2.6657822132110596, "logps/chosen": -238.00399780273438, "logps/rejected": -297.07891845703125, "loss": 0.5985, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.7608715295791626, "rewards/margins": 1.8040387630462646, "rewards/rejected": -3.5649101734161377, "step": 2710 }, { "epoch": 0.65, "learning_rate": 4.34346585844179e-07, "logits/chosen": -2.7711234092712402, "logits/rejected": -2.6151115894317627, "logps/chosen": -228.28500366210938, "logps/rejected": -203.56143188476562, "loss": 0.6575, "rewards/accuracies": 0.75, "rewards/chosen": -1.4607006311416626, "rewards/margins": 1.8166983127593994, "rewards/rejected": -3.2773985862731934, "step": 2720 }, { "epoch": 0.66, "learning_rate": 4.339008735960064e-07, "logits/chosen": -2.811764717102051, "logits/rejected": -2.6760783195495605, "logps/chosen": -369.1084289550781, "logps/rejected": -295.4292907714844, "loss": 0.569, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.6835203170776367, "rewards/margins": 0.6058062314987183, "rewards/rejected": -2.2893266677856445, "step": 2730 }, { "epoch": 0.66, "learning_rate": 4.3345516134783386e-07, "logits/chosen": -2.595508098602295, "logits/rejected": -2.546417236328125, "logps/chosen": -271.2452392578125, "logps/rejected": -238.8561553955078, "loss": 0.5094, "rewards/accuracies": 0.75, "rewards/chosen": -1.9008029699325562, "rewards/margins": 1.1821850538253784, "rewards/rejected": -3.0829882621765137, "step": 2740 }, { "epoch": 0.66, "learning_rate": 4.3300944909966126e-07, "logits/chosen": -2.4520745277404785, "logits/rejected": -2.415116786956787, "logps/chosen": -362.1225280761719, "logps/rejected": -327.42095947265625, "loss": 0.5453, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.5479540824890137, "rewards/margins": 0.861763596534729, "rewards/rejected": -3.409717559814453, "step": 2750 }, { "epoch": 0.66, "learning_rate": 4.3256373685148866e-07, "logits/chosen": -2.547978401184082, "logits/rejected": -2.4545705318450928, "logps/chosen": -283.8704528808594, "logps/rejected": -259.1752014160156, "loss": 0.4791, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.748185157775879, "rewards/margins": 1.3087536096572876, "rewards/rejected": -3.056938648223877, "step": 2760 }, { "epoch": 0.67, "learning_rate": 4.3211802460331606e-07, "logits/chosen": -2.5646090507507324, "logits/rejected": -2.663165807723999, "logps/chosen": -294.49261474609375, "logps/rejected": -303.9794006347656, "loss": 0.7898, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.3019287586212158, "rewards/margins": 1.9290939569473267, "rewards/rejected": -3.231022596359253, "step": 2770 }, { "epoch": 0.67, "learning_rate": 4.316723123551435e-07, "logits/chosen": -2.7926788330078125, "logits/rejected": -2.7688939571380615, "logps/chosen": -325.41058349609375, "logps/rejected": -332.62506103515625, "loss": 0.6187, "rewards/accuracies": 0.5, "rewards/chosen": -1.767809510231018, "rewards/margins": 0.045775678008794785, "rewards/rejected": -1.8135855197906494, "step": 2780 }, { "epoch": 0.67, "learning_rate": 4.312266001069709e-07, "logits/chosen": -2.803236484527588, "logits/rejected": -2.774876117706299, "logps/chosen": -264.9578857421875, "logps/rejected": -234.6368408203125, "loss": 0.6107, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.1145288944244385, "rewards/margins": 1.3622523546218872, "rewards/rejected": -2.4767813682556152, "step": 2790 }, { "epoch": 0.67, "learning_rate": 4.307808878587983e-07, "logits/chosen": -2.6970162391662598, "logits/rejected": -2.4754576683044434, "logps/chosen": -282.1333923339844, "logps/rejected": -283.2776794433594, "loss": 0.7454, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.6527990102767944, "rewards/margins": 0.17735615372657776, "rewards/rejected": -1.830155372619629, "step": 2800 }, { "epoch": 0.67, "eval_logits/chosen": -2.4574854373931885, "eval_logits/rejected": -2.4288878440856934, "eval_logps/chosen": -221.9242401123047, "eval_logps/rejected": -225.12472534179688, "eval_loss": 0.5290461778640747, "eval_rewards/accuracies": 0.6700000166893005, "eval_rewards/chosen": -1.999670386314392, "eval_rewards/margins": 1.5597243309020996, "eval_rewards/rejected": -3.559394598007202, "eval_runtime": 131.6609, "eval_samples_per_second": 23.971, "eval_steps_per_second": 0.38, "step": 2800 }, { "epoch": 0.68, "learning_rate": 4.303351756106258e-07, "logits/chosen": -2.6771788597106934, "logits/rejected": -2.6447653770446777, "logps/chosen": -224.6023406982422, "logps/rejected": -263.2858581542969, "loss": 0.5702, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.6448767185211182, "rewards/margins": 1.2766485214233398, "rewards/rejected": -2.921525478363037, "step": 2810 }, { "epoch": 0.68, "learning_rate": 4.298894633624532e-07, "logits/chosen": -2.8259623050689697, "logits/rejected": -2.7117819786071777, "logps/chosen": -342.81005859375, "logps/rejected": -322.091064453125, "loss": 0.5574, "rewards/accuracies": 0.75, "rewards/chosen": -1.029432773590088, "rewards/margins": 2.133007526397705, "rewards/rejected": -3.162440299987793, "step": 2820 }, { "epoch": 0.68, "learning_rate": 4.294437511142806e-07, "logits/chosen": -2.7422261238098145, "logits/rejected": -2.6582720279693604, "logps/chosen": -404.9603271484375, "logps/rejected": -286.91876220703125, "loss": 0.4297, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.7746641635894775, "rewards/margins": 2.1579883098602295, "rewards/rejected": -2.932652473449707, "step": 2830 }, { "epoch": 0.68, "learning_rate": 4.2899803886610804e-07, "logits/chosen": -2.8271028995513916, "logits/rejected": -2.699387788772583, "logps/chosen": -324.6036682128906, "logps/rejected": -258.8714599609375, "loss": 0.4797, "rewards/accuracies": 0.75, "rewards/chosen": -0.6421637535095215, "rewards/margins": 1.310378074645996, "rewards/rejected": -1.9525420665740967, "step": 2840 }, { "epoch": 0.69, "learning_rate": 4.2855232661793545e-07, "logits/chosen": -2.7117836475372314, "logits/rejected": -2.7621607780456543, "logps/chosen": -298.1368713378906, "logps/rejected": -316.424560546875, "loss": 0.5862, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.4727377891540527, "rewards/margins": 0.6655889749526978, "rewards/rejected": -3.138327121734619, "step": 2850 }, { "epoch": 0.69, "learning_rate": 4.2810661436976285e-07, "logits/chosen": -2.646487236022949, "logits/rejected": -2.706075668334961, "logps/chosen": -233.93753051757812, "logps/rejected": -271.9765930175781, "loss": 0.5744, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.41381773352622986, "rewards/margins": 1.9162561893463135, "rewards/rejected": -2.3300740718841553, "step": 2860 }, { "epoch": 0.69, "learning_rate": 4.276609021215903e-07, "logits/chosen": -2.6221437454223633, "logits/rejected": -2.539348602294922, "logps/chosen": -216.64389038085938, "logps/rejected": -192.21458435058594, "loss": 0.5586, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.6827747821807861, "rewards/margins": 1.3041735887527466, "rewards/rejected": -2.9869484901428223, "step": 2870 }, { "epoch": 0.69, "learning_rate": 4.272151898734177e-07, "logits/chosen": -2.6150918006896973, "logits/rejected": -2.653275966644287, "logps/chosen": -368.13031005859375, "logps/rejected": -327.22955322265625, "loss": 0.6245, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.465667486190796, "rewards/margins": 1.6559873819351196, "rewards/rejected": -3.121654748916626, "step": 2880 }, { "epoch": 0.7, "learning_rate": 4.267694776252451e-07, "logits/chosen": -2.7571969032287598, "logits/rejected": -2.6127772331237793, "logps/chosen": -249.05978393554688, "logps/rejected": -296.90496826171875, "loss": 0.6755, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.7854106426239014, "rewards/margins": 1.760719656944275, "rewards/rejected": -3.546130418777466, "step": 2890 }, { "epoch": 0.7, "learning_rate": 4.2632376537707257e-07, "logits/chosen": -2.6566245555877686, "logits/rejected": -2.586174488067627, "logps/chosen": -217.7415008544922, "logps/rejected": -230.2128448486328, "loss": 0.57, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.9581342935562134, "rewards/margins": 2.0794451236724854, "rewards/rejected": -4.03757905960083, "step": 2900 }, { "epoch": 0.7, "learning_rate": 4.2587805312889997e-07, "logits/chosen": -2.661797285079956, "logits/rejected": -2.78605318069458, "logps/chosen": -210.2810516357422, "logps/rejected": -258.68243408203125, "loss": 0.5689, "rewards/accuracies": 0.75, "rewards/chosen": -1.7073214054107666, "rewards/margins": 1.2735803127288818, "rewards/rejected": -2.9809017181396484, "step": 2910 }, { "epoch": 0.7, "learning_rate": 4.2543234088072737e-07, "logits/chosen": -2.734321117401123, "logits/rejected": -2.6187925338745117, "logps/chosen": -275.55340576171875, "logps/rejected": -231.14389038085938, "loss": 0.6469, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.684971570968628, "rewards/margins": 0.38038399815559387, "rewards/rejected": -2.0653557777404785, "step": 2920 }, { "epoch": 0.71, "learning_rate": 4.249866286325548e-07, "logits/chosen": -2.809257984161377, "logits/rejected": -2.789126396179199, "logps/chosen": -249.81005859375, "logps/rejected": -294.38739013671875, "loss": 0.7303, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.1528228521347046, "rewards/margins": 0.6378811001777649, "rewards/rejected": -1.7907040119171143, "step": 2930 }, { "epoch": 0.71, "learning_rate": 4.2454091638438223e-07, "logits/chosen": -2.6959519386291504, "logits/rejected": -2.7930846214294434, "logps/chosen": -261.52392578125, "logps/rejected": -261.2728576660156, "loss": 0.586, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.7664673328399658, "rewards/margins": 1.2213587760925293, "rewards/rejected": -2.987825870513916, "step": 2940 }, { "epoch": 0.71, "learning_rate": 4.2409520413620963e-07, "logits/chosen": -2.8703808784484863, "logits/rejected": -2.840724229812622, "logps/chosen": -292.93695068359375, "logps/rejected": -257.15087890625, "loss": 0.4875, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.9478158950805664, "rewards/margins": 1.4594476222991943, "rewards/rejected": -2.4072635173797607, "step": 2950 }, { "epoch": 0.71, "learning_rate": 4.2364949188803704e-07, "logits/chosen": -2.78837513923645, "logits/rejected": -2.6849160194396973, "logps/chosen": -189.768798828125, "logps/rejected": -236.8516082763672, "loss": 0.4793, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.369784951210022, "rewards/margins": 1.6183780431747437, "rewards/rejected": -2.9881629943847656, "step": 2960 }, { "epoch": 0.71, "learning_rate": 4.232037796398645e-07, "logits/chosen": -2.8257720470428467, "logits/rejected": -2.808422565460205, "logps/chosen": -217.53842163085938, "logps/rejected": -213.1930694580078, "loss": 0.4727, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.0007766485214233, "rewards/margins": 1.5476601123809814, "rewards/rejected": -2.5484366416931152, "step": 2970 }, { "epoch": 0.72, "learning_rate": 4.227580673916919e-07, "logits/chosen": -2.770376205444336, "logits/rejected": -2.692963123321533, "logps/chosen": -344.5162048339844, "logps/rejected": -301.2332763671875, "loss": 0.5528, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.7139848470687866, "rewards/margins": 2.1029775142669678, "rewards/rejected": -2.816962242126465, "step": 2980 }, { "epoch": 0.72, "learning_rate": 4.223123551435193e-07, "logits/chosen": -2.8515336513519287, "logits/rejected": -2.7625396251678467, "logps/chosen": -228.93893432617188, "logps/rejected": -220.40744018554688, "loss": 0.5718, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.007663369178772, "rewards/margins": 1.4957387447357178, "rewards/rejected": -2.5034019947052, "step": 2990 }, { "epoch": 0.72, "learning_rate": 4.2186664289534675e-07, "logits/chosen": -2.7313835620880127, "logits/rejected": -2.762516975402832, "logps/chosen": -317.2395935058594, "logps/rejected": -308.21319580078125, "loss": 0.5126, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.7839268445968628, "rewards/margins": 1.3697669506072998, "rewards/rejected": -2.153693675994873, "step": 3000 }, { "epoch": 0.72, "learning_rate": 4.2142093064717416e-07, "logits/chosen": -2.6275830268859863, "logits/rejected": -2.659775972366333, "logps/chosen": -177.7250213623047, "logps/rejected": -220.8149871826172, "loss": 0.48, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.5103175640106201, "rewards/margins": 2.0506412982940674, "rewards/rejected": -2.5609591007232666, "step": 3010 }, { "epoch": 0.73, "learning_rate": 4.2097521839900156e-07, "logits/chosen": -2.5938849449157715, "logits/rejected": -2.56001353263855, "logps/chosen": -314.12158203125, "logps/rejected": -261.13641357421875, "loss": 0.5094, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.2560573816299438, "rewards/margins": 1.8153836727142334, "rewards/rejected": -3.071441173553467, "step": 3020 }, { "epoch": 0.73, "learning_rate": 4.20529506150829e-07, "logits/chosen": -2.7524189949035645, "logits/rejected": -2.6022703647613525, "logps/chosen": -218.4010772705078, "logps/rejected": -273.52423095703125, "loss": 0.5383, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.0239105224609375, "rewards/margins": 2.7949516773223877, "rewards/rejected": -3.818862199783325, "step": 3030 }, { "epoch": 0.73, "learning_rate": 4.200837939026564e-07, "logits/chosen": -2.6513009071350098, "logits/rejected": -2.6527931690216064, "logps/chosen": -229.96286010742188, "logps/rejected": -232.0447998046875, "loss": 0.6208, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.2891321182250977, "rewards/margins": 1.7754385471343994, "rewards/rejected": -3.064570665359497, "step": 3040 }, { "epoch": 0.73, "learning_rate": 4.196380816544838e-07, "logits/chosen": -2.593024730682373, "logits/rejected": -2.6378841400146484, "logps/chosen": -232.8154754638672, "logps/rejected": -195.8571014404297, "loss": 0.5604, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.7388196587562561, "rewards/margins": 1.4008862972259521, "rewards/rejected": -2.1397061347961426, "step": 3050 }, { "epoch": 0.74, "learning_rate": 4.191923694063113e-07, "logits/chosen": -2.851116418838501, "logits/rejected": -2.6879868507385254, "logps/chosen": -299.67425537109375, "logps/rejected": -255.0391082763672, "loss": 0.7818, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.7442834377288818, "rewards/margins": 0.992772102355957, "rewards/rejected": -2.737055540084839, "step": 3060 }, { "epoch": 0.74, "learning_rate": 4.187466571581387e-07, "logits/chosen": -2.6634647846221924, "logits/rejected": -2.553982973098755, "logps/chosen": -279.292236328125, "logps/rejected": -202.8257598876953, "loss": 0.5556, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.6925932168960571, "rewards/margins": 1.1422004699707031, "rewards/rejected": -2.8347935676574707, "step": 3070 }, { "epoch": 0.74, "learning_rate": 4.183009449099661e-07, "logits/chosen": -2.6643576622009277, "logits/rejected": -2.5717310905456543, "logps/chosen": -342.335693359375, "logps/rejected": -294.61810302734375, "loss": 0.5782, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.5681596994400024, "rewards/margins": 1.7290674448013306, "rewards/rejected": -3.297227144241333, "step": 3080 }, { "epoch": 0.74, "learning_rate": 4.178552326617935e-07, "logits/chosen": -2.5645246505737305, "logits/rejected": -2.472097873687744, "logps/chosen": -261.25677490234375, "logps/rejected": -253.5023193359375, "loss": 0.4876, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.3006632328033447, "rewards/margins": 2.4436044692993164, "rewards/rejected": -3.7442679405212402, "step": 3090 }, { "epoch": 0.75, "learning_rate": 4.1740952041362094e-07, "logits/chosen": -2.579249143600464, "logits/rejected": -2.4710917472839355, "logps/chosen": -290.3495788574219, "logps/rejected": -304.74017333984375, "loss": 0.5786, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.2462234497070312, "rewards/margins": 1.3442294597625732, "rewards/rejected": -3.5904529094696045, "step": 3100 }, { "epoch": 0.75, "learning_rate": 4.1696380816544834e-07, "logits/chosen": -2.6869969367980957, "logits/rejected": -2.743889570236206, "logps/chosen": -307.068115234375, "logps/rejected": -354.55279541015625, "loss": 0.696, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.6236753463745117, "rewards/margins": 1.5922362804412842, "rewards/rejected": -3.215911388397217, "step": 3110 }, { "epoch": 0.75, "learning_rate": 4.1651809591727575e-07, "logits/chosen": -2.67097806930542, "logits/rejected": -2.6003124713897705, "logps/chosen": -201.99729919433594, "logps/rejected": -250.19094848632812, "loss": 0.5922, "rewards/accuracies": 0.75, "rewards/chosen": -1.6611751317977905, "rewards/margins": 1.9977973699569702, "rewards/rejected": -3.6589725017547607, "step": 3120 }, { "epoch": 0.75, "learning_rate": 4.160723836691032e-07, "logits/chosen": -2.574385166168213, "logits/rejected": -2.533609628677368, "logps/chosen": -277.22808837890625, "logps/rejected": -255.15725708007812, "loss": 0.7537, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.093635082244873, "rewards/margins": 1.2647625207901, "rewards/rejected": -3.3583977222442627, "step": 3130 }, { "epoch": 0.76, "learning_rate": 4.156266714209306e-07, "logits/chosen": -2.8209471702575684, "logits/rejected": -2.765892267227173, "logps/chosen": -250.3872528076172, "logps/rejected": -296.6456298828125, "loss": 0.5281, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.0076568126678467, "rewards/margins": 1.0997529029846191, "rewards/rejected": -3.107409954071045, "step": 3140 }, { "epoch": 0.76, "learning_rate": 4.15180959172758e-07, "logits/chosen": -2.7392449378967285, "logits/rejected": -2.685570478439331, "logps/chosen": -244.25949096679688, "logps/rejected": -202.15159606933594, "loss": 0.6135, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -2.1826963424682617, "rewards/margins": 0.40876954793930054, "rewards/rejected": -2.591465950012207, "step": 3150 }, { "epoch": 0.76, "learning_rate": 4.1473524692458546e-07, "logits/chosen": -2.697981357574463, "logits/rejected": -2.688508987426758, "logps/chosen": -256.394287109375, "logps/rejected": -239.4425048828125, "loss": 0.4602, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.6544971466064453, "rewards/margins": 0.9632614254951477, "rewards/rejected": -2.6177589893341064, "step": 3160 }, { "epoch": 0.76, "learning_rate": 4.1428953467641287e-07, "logits/chosen": -2.6453068256378174, "logits/rejected": -2.637723445892334, "logps/chosen": -207.82302856445312, "logps/rejected": -190.76217651367188, "loss": 0.5699, "rewards/accuracies": 0.75, "rewards/chosen": -0.8176633715629578, "rewards/margins": 1.8111244440078735, "rewards/rejected": -2.6287875175476074, "step": 3170 }, { "epoch": 0.77, "learning_rate": 4.1384382242824027e-07, "logits/chosen": -2.7648839950561523, "logits/rejected": -2.561379909515381, "logps/chosen": -219.7236785888672, "logps/rejected": -157.59056091308594, "loss": 0.6589, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -1.0911527872085571, "rewards/margins": 0.5300930738449097, "rewards/rejected": -1.6212456226348877, "step": 3180 }, { "epoch": 0.77, "learning_rate": 4.133981101800677e-07, "logits/chosen": -2.730799436569214, "logits/rejected": -2.71309757232666, "logps/chosen": -291.04742431640625, "logps/rejected": -333.51019287109375, "loss": 0.6767, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.6952773332595825, "rewards/margins": 0.9070941805839539, "rewards/rejected": -1.6023715734481812, "step": 3190 }, { "epoch": 0.77, "learning_rate": 4.1295239793189513e-07, "logits/chosen": -2.7877984046936035, "logits/rejected": -2.6307997703552246, "logps/chosen": -241.40847778320312, "logps/rejected": -211.07809448242188, "loss": 0.6092, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -1.0838617086410522, "rewards/margins": 0.9140681028366089, "rewards/rejected": -1.9979298114776611, "step": 3200 }, { "epoch": 0.77, "eval_logits/chosen": -2.5023696422576904, "eval_logits/rejected": -2.4754655361175537, "eval_logps/chosen": -218.04718017578125, "eval_logps/rejected": -220.98233032226562, "eval_loss": 0.5124280452728271, "eval_rewards/accuracies": 0.6850000023841858, "eval_rewards/chosen": -1.6119625568389893, "eval_rewards/margins": 1.533191204071045, "eval_rewards/rejected": -3.145153760910034, "eval_runtime": 131.8008, "eval_samples_per_second": 23.945, "eval_steps_per_second": 0.379, "step": 3200 }, { "epoch": 0.77, "learning_rate": 4.1250668568372253e-07, "logits/chosen": -2.6086478233337402, "logits/rejected": -2.5804197788238525, "logps/chosen": -203.43679809570312, "logps/rejected": -236.58547973632812, "loss": 0.5141, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.9587739109992981, "rewards/margins": 1.6325676441192627, "rewards/rejected": -2.591341733932495, "step": 3210 }, { "epoch": 0.77, "learning_rate": 4.1206097343555e-07, "logits/chosen": -2.584949254989624, "logits/rejected": -2.6324820518493652, "logps/chosen": -210.24209594726562, "logps/rejected": -212.9149169921875, "loss": 0.5011, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7539044618606567, "rewards/margins": 1.8157832622528076, "rewards/rejected": -2.569687843322754, "step": 3220 }, { "epoch": 0.78, "learning_rate": 4.116152611873774e-07, "logits/chosen": -2.62009596824646, "logits/rejected": -2.600926160812378, "logps/chosen": -257.4697265625, "logps/rejected": -274.40985107421875, "loss": 0.5222, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.9684908986091614, "rewards/margins": 1.3399364948272705, "rewards/rejected": -2.308427333831787, "step": 3230 }, { "epoch": 0.78, "learning_rate": 4.1116954893920485e-07, "logits/chosen": -2.8385872840881348, "logits/rejected": -2.6282565593719482, "logps/chosen": -323.53875732421875, "logps/rejected": -255.8197784423828, "loss": 0.6685, "rewards/accuracies": 0.75, "rewards/chosen": -1.5275509357452393, "rewards/margins": 2.112471580505371, "rewards/rejected": -3.6400222778320312, "step": 3240 }, { "epoch": 0.78, "learning_rate": 4.107238366910323e-07, "logits/chosen": -2.627898693084717, "logits/rejected": -2.645310640335083, "logps/chosen": -275.2945251464844, "logps/rejected": -285.93389892578125, "loss": 0.5802, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.2948415279388428, "rewards/margins": 1.8908132314682007, "rewards/rejected": -3.185654878616333, "step": 3250 }, { "epoch": 0.78, "learning_rate": 4.102781244428597e-07, "logits/chosen": -2.7616629600524902, "logits/rejected": -2.687288284301758, "logps/chosen": -284.48553466796875, "logps/rejected": -241.5064239501953, "loss": 0.5391, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.3835375308990479, "rewards/margins": 1.3874008655548096, "rewards/rejected": -2.7709383964538574, "step": 3260 }, { "epoch": 0.79, "learning_rate": 4.098324121946871e-07, "logits/chosen": -2.719691276550293, "logits/rejected": -2.5804100036621094, "logps/chosen": -254.3754119873047, "logps/rejected": -256.03387451171875, "loss": 0.5452, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.2589216232299805, "rewards/margins": 2.365007162094116, "rewards/rejected": -3.6239287853240967, "step": 3270 }, { "epoch": 0.79, "learning_rate": 4.093866999465145e-07, "logits/chosen": -2.795424699783325, "logits/rejected": -2.647678852081299, "logps/chosen": -304.1939392089844, "logps/rejected": -306.00494384765625, "loss": 0.5856, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.8605068922042847, "rewards/margins": 1.4221422672271729, "rewards/rejected": -2.282649040222168, "step": 3280 }, { "epoch": 0.79, "learning_rate": 4.0894098769834197e-07, "logits/chosen": -2.6333696842193604, "logits/rejected": -2.4974284172058105, "logps/chosen": -313.07904052734375, "logps/rejected": -293.76153564453125, "loss": 0.5429, "rewards/accuracies": 0.75, "rewards/chosen": -1.3331620693206787, "rewards/margins": 1.9567950963974, "rewards/rejected": -3.289957046508789, "step": 3290 }, { "epoch": 0.79, "learning_rate": 4.0849527545016937e-07, "logits/chosen": -2.556406021118164, "logits/rejected": -2.5137507915496826, "logps/chosen": -340.04840087890625, "logps/rejected": -318.72686767578125, "loss": 0.4925, "rewards/accuracies": 0.75, "rewards/chosen": -0.1319781392812729, "rewards/margins": 1.5771225690841675, "rewards/rejected": -1.7091007232666016, "step": 3300 }, { "epoch": 0.8, "learning_rate": 4.0804956320199677e-07, "logits/chosen": -2.4623820781707764, "logits/rejected": -2.445798635482788, "logps/chosen": -205.68472290039062, "logps/rejected": -215.0249786376953, "loss": 0.5373, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -1.573722004890442, "rewards/margins": 1.2487709522247314, "rewards/rejected": -2.822493076324463, "step": 3310 }, { "epoch": 0.8, "learning_rate": 4.0760385095382423e-07, "logits/chosen": -2.6179795265197754, "logits/rejected": -2.5789780616760254, "logps/chosen": -344.1896667480469, "logps/rejected": -260.4026794433594, "loss": 0.5802, "rewards/accuracies": 0.75, "rewards/chosen": -0.7595813870429993, "rewards/margins": 1.3831188678741455, "rewards/rejected": -2.1427001953125, "step": 3320 }, { "epoch": 0.8, "learning_rate": 4.0715813870565163e-07, "logits/chosen": -2.6235435009002686, "logits/rejected": -2.544433355331421, "logps/chosen": -253.1512908935547, "logps/rejected": -364.1452331542969, "loss": 0.4449, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.5454899072647095, "rewards/margins": 2.4067556858062744, "rewards/rejected": -3.9522452354431152, "step": 3330 }, { "epoch": 0.8, "learning_rate": 4.0671242645747903e-07, "logits/chosen": -2.55584979057312, "logits/rejected": -2.533679485321045, "logps/chosen": -328.15985107421875, "logps/rejected": -349.55181884765625, "loss": 0.5493, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.9840243458747864, "rewards/margins": 0.47254204750061035, "rewards/rejected": -1.456566572189331, "step": 3340 }, { "epoch": 0.81, "learning_rate": 4.062667142093065e-07, "logits/chosen": -2.5850658416748047, "logits/rejected": -2.600969076156616, "logps/chosen": -333.9216613769531, "logps/rejected": -289.7823486328125, "loss": 0.4948, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.2890478372573853, "rewards/margins": 0.7226174473762512, "rewards/rejected": -2.011665105819702, "step": 3350 }, { "epoch": 0.81, "learning_rate": 4.058210019611339e-07, "logits/chosen": -2.55297589302063, "logits/rejected": -2.4784064292907715, "logps/chosen": -250.0596160888672, "logps/rejected": -254.9423065185547, "loss": 0.5404, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.4082493782043457, "rewards/margins": 1.2668209075927734, "rewards/rejected": -2.675070285797119, "step": 3360 }, { "epoch": 0.81, "learning_rate": 4.053752897129613e-07, "logits/chosen": -2.54938006401062, "logits/rejected": -2.424170970916748, "logps/chosen": -283.8787841796875, "logps/rejected": -223.3419952392578, "loss": 0.6448, "rewards/accuracies": 0.75, "rewards/chosen": -0.8317139744758606, "rewards/margins": 2.6544876098632812, "rewards/rejected": -3.486201524734497, "step": 3370 }, { "epoch": 0.81, "learning_rate": 4.0492957746478875e-07, "logits/chosen": -2.602226972579956, "logits/rejected": -2.543545961380005, "logps/chosen": -337.1436462402344, "logps/rejected": -331.75274658203125, "loss": 0.6345, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.4520931243896484, "rewards/margins": 1.1032426357269287, "rewards/rejected": -2.555335521697998, "step": 3380 }, { "epoch": 0.82, "learning_rate": 4.0448386521661615e-07, "logits/chosen": -2.709064483642578, "logits/rejected": -2.609285593032837, "logps/chosen": -232.5091552734375, "logps/rejected": -223.9417724609375, "loss": 0.5908, "rewards/accuracies": 0.75, "rewards/chosen": -2.2587926387786865, "rewards/margins": 1.2197411060333252, "rewards/rejected": -3.4785335063934326, "step": 3390 }, { "epoch": 0.82, "learning_rate": 4.0403815296844356e-07, "logits/chosen": -2.738271474838257, "logits/rejected": -2.594999313354492, "logps/chosen": -275.84295654296875, "logps/rejected": -344.08428955078125, "loss": 0.4963, "rewards/accuracies": 0.75, "rewards/chosen": -1.0369099378585815, "rewards/margins": 1.9961076974868774, "rewards/rejected": -3.03301739692688, "step": 3400 }, { "epoch": 0.82, "learning_rate": 4.03592440720271e-07, "logits/chosen": -2.6316440105438232, "logits/rejected": -2.522408962249756, "logps/chosen": -280.1983947753906, "logps/rejected": -274.7087707519531, "loss": 0.4378, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.9649044275283813, "rewards/margins": 1.9704071283340454, "rewards/rejected": -3.935311794281006, "step": 3410 }, { "epoch": 0.82, "learning_rate": 4.031467284720984e-07, "logits/chosen": -2.6141161918640137, "logits/rejected": -2.516469955444336, "logps/chosen": -232.9725799560547, "logps/rejected": -208.60269165039062, "loss": 0.5014, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.7316086292266846, "rewards/margins": 1.9986438751220703, "rewards/rejected": -3.730252504348755, "step": 3420 }, { "epoch": 0.83, "learning_rate": 4.027010162239258e-07, "logits/chosen": -2.6620287895202637, "logits/rejected": -2.523186206817627, "logps/chosen": -256.6522521972656, "logps/rejected": -199.36477661132812, "loss": 0.6619, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.9984378814697266, "rewards/margins": 2.2110140323638916, "rewards/rejected": -3.2094521522521973, "step": 3430 }, { "epoch": 0.83, "learning_rate": 4.022553039757532e-07, "logits/chosen": -2.6714510917663574, "logits/rejected": -2.648057460784912, "logps/chosen": -287.1764831542969, "logps/rejected": -313.5572814941406, "loss": 0.6233, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.1280102729797363, "rewards/margins": 0.7432471513748169, "rewards/rejected": -2.871257781982422, "step": 3440 }, { "epoch": 0.83, "learning_rate": 4.018095917275807e-07, "logits/chosen": -2.675344705581665, "logits/rejected": -2.560187578201294, "logps/chosen": -334.3106689453125, "logps/rejected": -262.6590270996094, "loss": 0.5801, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.4317984580993652, "rewards/margins": 0.2597549557685852, "rewards/rejected": -2.6915533542633057, "step": 3450 }, { "epoch": 0.83, "learning_rate": 4.013638794794081e-07, "logits/chosen": -2.728790760040283, "logits/rejected": -2.531425952911377, "logps/chosen": -299.5249938964844, "logps/rejected": -282.14385986328125, "loss": 0.4619, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.9811800122261047, "rewards/margins": 1.9384164810180664, "rewards/rejected": -2.919595956802368, "step": 3460 }, { "epoch": 0.84, "learning_rate": 4.009181672312355e-07, "logits/chosen": -2.5976831912994385, "logits/rejected": -2.5936222076416016, "logps/chosen": -251.3915557861328, "logps/rejected": -240.14254760742188, "loss": 0.5653, "rewards/accuracies": 0.75, "rewards/chosen": -2.274533748626709, "rewards/margins": 0.9345539808273315, "rewards/rejected": -3.20908784866333, "step": 3470 }, { "epoch": 0.84, "learning_rate": 4.0047245498306294e-07, "logits/chosen": -2.681915283203125, "logits/rejected": -2.4873757362365723, "logps/chosen": -258.72454833984375, "logps/rejected": -274.6829833984375, "loss": 0.5518, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.1577260494232178, "rewards/margins": 1.8516298532485962, "rewards/rejected": -3.0093560218811035, "step": 3480 }, { "epoch": 0.84, "learning_rate": 4.0002674273489034e-07, "logits/chosen": -2.3440256118774414, "logits/rejected": -2.4099361896514893, "logps/chosen": -424.033935546875, "logps/rejected": -313.701416015625, "loss": 1.6005, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -11.069270133972168, "rewards/margins": -4.384868621826172, "rewards/rejected": -6.6844024658203125, "step": 3490 }, { "epoch": 0.84, "learning_rate": 3.9958103048671774e-07, "logits/chosen": -2.6543171405792236, "logits/rejected": -2.5328967571258545, "logps/chosen": -211.1274871826172, "logps/rejected": -183.19216918945312, "loss": 0.5616, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -1.5951848030090332, "rewards/margins": 1.0990397930145264, "rewards/rejected": -2.6942248344421387, "step": 3500 }, { "epoch": 0.84, "learning_rate": 3.991353182385452e-07, "logits/chosen": -2.6359877586364746, "logits/rejected": -2.553628444671631, "logps/chosen": -371.024658203125, "logps/rejected": -292.0741271972656, "loss": 0.608, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.197676181793213, "rewards/margins": 1.814406156539917, "rewards/rejected": -4.012082576751709, "step": 3510 }, { "epoch": 0.85, "learning_rate": 3.986896059903726e-07, "logits/chosen": -2.1818909645080566, "logits/rejected": -2.1755576133728027, "logps/chosen": -216.0900421142578, "logps/rejected": -177.17047119140625, "loss": 0.5922, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.4119014739990234, "rewards/margins": 1.8648496866226196, "rewards/rejected": -3.2767510414123535, "step": 3520 }, { "epoch": 0.85, "learning_rate": 3.982438937422e-07, "logits/chosen": -2.3716135025024414, "logits/rejected": -2.330021381378174, "logps/chosen": -228.31802368164062, "logps/rejected": -238.8856658935547, "loss": 0.5279, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.9070593118667603, "rewards/margins": 2.2716453075408936, "rewards/rejected": -4.178704738616943, "step": 3530 }, { "epoch": 0.85, "learning_rate": 3.9779818149402746e-07, "logits/chosen": -2.4213452339172363, "logits/rejected": -2.287424325942993, "logps/chosen": -173.0181427001953, "logps/rejected": -138.43270874023438, "loss": 0.617, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.259591579437256, "rewards/margins": 0.8109132647514343, "rewards/rejected": -3.070504903793335, "step": 3540 }, { "epoch": 0.85, "learning_rate": 3.9735246924585486e-07, "logits/chosen": -2.5399069786071777, "logits/rejected": -2.5299155712127686, "logps/chosen": -205.474853515625, "logps/rejected": -172.39971923828125, "loss": 0.5583, "rewards/accuracies": 0.75, "rewards/chosen": -1.7592484951019287, "rewards/margins": 1.6299797296524048, "rewards/rejected": -3.389228105545044, "step": 3550 }, { "epoch": 0.86, "learning_rate": 3.9690675699768227e-07, "logits/chosen": -2.606238842010498, "logits/rejected": -2.4996159076690674, "logps/chosen": -229.6689453125, "logps/rejected": -234.391845703125, "loss": 0.5577, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.3872594833374023, "rewards/margins": 1.8904683589935303, "rewards/rejected": -3.2777278423309326, "step": 3560 }, { "epoch": 0.86, "learning_rate": 3.964610447495097e-07, "logits/chosen": -2.5027756690979004, "logits/rejected": -2.4127001762390137, "logps/chosen": -268.26849365234375, "logps/rejected": -248.8619842529297, "loss": 0.541, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.935086965560913, "rewards/margins": 1.4139950275421143, "rewards/rejected": -4.349081993103027, "step": 3570 }, { "epoch": 0.86, "learning_rate": 3.960153325013371e-07, "logits/chosen": -2.600238800048828, "logits/rejected": -2.705057382583618, "logps/chosen": -332.97906494140625, "logps/rejected": -393.6985168457031, "loss": 0.6294, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.9589897394180298, "rewards/margins": 0.9357390403747559, "rewards/rejected": -2.894728899002075, "step": 3580 }, { "epoch": 0.86, "learning_rate": 3.9556962025316453e-07, "logits/chosen": -2.4526476860046387, "logits/rejected": -2.3240785598754883, "logps/chosen": -334.75152587890625, "logps/rejected": -304.6665344238281, "loss": 0.6832, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.4315619468688965, "rewards/margins": 1.0265862941741943, "rewards/rejected": -3.458148241043091, "step": 3590 }, { "epoch": 0.87, "learning_rate": 3.9512390800499193e-07, "logits/chosen": -2.607935667037964, "logits/rejected": -2.495234727859497, "logps/chosen": -257.22442626953125, "logps/rejected": -254.654296875, "loss": 0.674, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.4075379371643066, "rewards/margins": 1.691053032875061, "rewards/rejected": -4.098590850830078, "step": 3600 }, { "epoch": 0.87, "eval_logits/chosen": -2.288510799407959, "eval_logits/rejected": -2.256424903869629, "eval_logps/chosen": -231.8350372314453, "eval_logps/rejected": -236.4845733642578, "eval_loss": 0.5134379267692566, "eval_rewards/accuracies": 0.675000011920929, "eval_rewards/chosen": -2.9907476902008057, "eval_rewards/margins": 1.7046312093734741, "eval_rewards/rejected": -4.695379257202148, "eval_runtime": 133.1197, "eval_samples_per_second": 23.708, "eval_steps_per_second": 0.376, "step": 3600 }, { "epoch": 0.87, "learning_rate": 3.946781957568194e-07, "logits/chosen": -2.609255313873291, "logits/rejected": -2.559530735015869, "logps/chosen": -273.26092529296875, "logps/rejected": -311.75030517578125, "loss": 0.556, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.2174479961395264, "rewards/margins": 0.8377370834350586, "rewards/rejected": -3.055185079574585, "step": 3610 }, { "epoch": 0.87, "learning_rate": 3.942324835086468e-07, "logits/chosen": -2.604952812194824, "logits/rejected": -2.5987837314605713, "logps/chosen": -303.27203369140625, "logps/rejected": -262.020751953125, "loss": 0.552, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.2536367177963257, "rewards/margins": 2.3096842765808105, "rewards/rejected": -3.5633208751678467, "step": 3620 }, { "epoch": 0.87, "learning_rate": 3.937867712604742e-07, "logits/chosen": -2.687490940093994, "logits/rejected": -2.5505900382995605, "logps/chosen": -407.6099548339844, "logps/rejected": -314.99267578125, "loss": 0.5187, "rewards/accuracies": 0.75, "rewards/chosen": -1.3432193994522095, "rewards/margins": 1.757752776145935, "rewards/rejected": -3.1009724140167236, "step": 3630 }, { "epoch": 0.88, "learning_rate": 3.9334105901230165e-07, "logits/chosen": -2.369417667388916, "logits/rejected": -2.2396793365478516, "logps/chosen": -338.2528381347656, "logps/rejected": -341.28594970703125, "loss": 0.4532, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.8581418991088867, "rewards/margins": 3.1549291610717773, "rewards/rejected": -5.013071537017822, "step": 3640 }, { "epoch": 0.88, "learning_rate": 3.9289534676412905e-07, "logits/chosen": -2.4682111740112305, "logits/rejected": -2.5353970527648926, "logps/chosen": -228.75643920898438, "logps/rejected": -242.5396270751953, "loss": 0.6887, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -1.8795896768569946, "rewards/margins": 0.8770751953125, "rewards/rejected": -2.756664752960205, "step": 3650 }, { "epoch": 0.88, "learning_rate": 3.9244963451595645e-07, "logits/chosen": -2.5755834579467773, "logits/rejected": -2.496194362640381, "logps/chosen": -204.44625854492188, "logps/rejected": -202.35910034179688, "loss": 0.5614, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.572894811630249, "rewards/margins": 1.5533229112625122, "rewards/rejected": -3.1262173652648926, "step": 3660 }, { "epoch": 0.88, "learning_rate": 3.920039222677839e-07, "logits/chosen": -2.531261682510376, "logits/rejected": -2.4382405281066895, "logps/chosen": -219.8284912109375, "logps/rejected": -268.8829650878906, "loss": 0.5608, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.3239612579345703, "rewards/margins": 2.532979726791382, "rewards/rejected": -3.8569416999816895, "step": 3670 }, { "epoch": 0.89, "learning_rate": 3.915582100196113e-07, "logits/chosen": -2.659780740737915, "logits/rejected": -2.6414692401885986, "logps/chosen": -187.46194458007812, "logps/rejected": -271.33929443359375, "loss": 0.6254, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.4945189952850342, "rewards/margins": 1.7238363027572632, "rewards/rejected": -3.218355655670166, "step": 3680 }, { "epoch": 0.89, "learning_rate": 3.911124977714387e-07, "logits/chosen": -2.525784730911255, "logits/rejected": -2.605313301086426, "logps/chosen": -138.00347900390625, "logps/rejected": -212.57797241210938, "loss": 0.5476, "rewards/accuracies": 0.75, "rewards/chosen": -2.4867968559265137, "rewards/margins": 1.0520381927490234, "rewards/rejected": -3.538835048675537, "step": 3690 }, { "epoch": 0.89, "learning_rate": 3.9066678552326617e-07, "logits/chosen": -2.747058391571045, "logits/rejected": -2.5562584400177, "logps/chosen": -366.90496826171875, "logps/rejected": -337.36309814453125, "loss": 0.5512, "rewards/accuracies": 0.75, "rewards/chosen": -1.590149164199829, "rewards/margins": 1.419371247291565, "rewards/rejected": -3.0095202922821045, "step": 3700 }, { "epoch": 0.89, "learning_rate": 3.902210732750936e-07, "logits/chosen": -2.576045513153076, "logits/rejected": -2.4777674674987793, "logps/chosen": -254.02273559570312, "logps/rejected": -332.081298828125, "loss": 0.6372, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.1122708320617676, "rewards/margins": 0.6883398294448853, "rewards/rejected": -2.8006105422973633, "step": 3710 }, { "epoch": 0.9, "learning_rate": 3.89775361026921e-07, "logits/chosen": -2.511871576309204, "logits/rejected": -2.6217472553253174, "logps/chosen": -264.2774963378906, "logps/rejected": -251.447509765625, "loss": 0.4727, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.3318699598312378, "rewards/margins": 2.0866668224334717, "rewards/rejected": -3.41853666305542, "step": 3720 }, { "epoch": 0.9, "learning_rate": 3.8932964877874843e-07, "logits/chosen": -2.5318033695220947, "logits/rejected": -2.500244617462158, "logps/chosen": -229.66226196289062, "logps/rejected": -264.37689208984375, "loss": 0.5982, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -1.124739646911621, "rewards/margins": 0.6877515316009521, "rewards/rejected": -1.8124911785125732, "step": 3730 }, { "epoch": 0.9, "learning_rate": 3.8888393653057584e-07, "logits/chosen": -2.362482786178589, "logits/rejected": -2.423068046569824, "logps/chosen": -234.7594451904297, "logps/rejected": -170.59483337402344, "loss": 0.5885, "rewards/accuracies": 0.75, "rewards/chosen": -1.6956630945205688, "rewards/margins": 1.458892583847046, "rewards/rejected": -3.154555559158325, "step": 3740 }, { "epoch": 0.9, "learning_rate": 3.8843822428240324e-07, "logits/chosen": -2.4557576179504395, "logits/rejected": -2.549530506134033, "logps/chosen": -189.58731079101562, "logps/rejected": -231.9694061279297, "loss": 0.4583, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.744417428970337, "rewards/margins": 0.9202855825424194, "rewards/rejected": -2.664702892303467, "step": 3750 }, { "epoch": 0.9, "learning_rate": 3.8799251203423064e-07, "logits/chosen": -2.7234930992126465, "logits/rejected": -2.657794952392578, "logps/chosen": -336.3365173339844, "logps/rejected": -351.0404052734375, "loss": 0.4876, "rewards/accuracies": 0.75, "rewards/chosen": -1.1833642721176147, "rewards/margins": 1.5095411539077759, "rewards/rejected": -2.6929054260253906, "step": 3760 }, { "epoch": 0.91, "learning_rate": 3.875467997860581e-07, "logits/chosen": -2.586146354675293, "logits/rejected": -2.4543778896331787, "logps/chosen": -218.3040008544922, "logps/rejected": -178.90457153320312, "loss": 0.5329, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.7448152303695679, "rewards/margins": 0.5987850427627563, "rewards/rejected": -2.3436005115509033, "step": 3770 }, { "epoch": 0.91, "learning_rate": 3.871010875378855e-07, "logits/chosen": -2.396613597869873, "logits/rejected": -2.467390298843384, "logps/chosen": -278.26885986328125, "logps/rejected": -285.6539611816406, "loss": 0.5684, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.6755433678627014, "rewards/margins": 2.223599672317505, "rewards/rejected": -2.8991427421569824, "step": 3780 }, { "epoch": 0.91, "learning_rate": 3.866553752897129e-07, "logits/chosen": -2.567450523376465, "logits/rejected": -2.653172492980957, "logps/chosen": -252.27001953125, "logps/rejected": -324.4461669921875, "loss": 0.5105, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.1028369665145874, "rewards/margins": 2.0712685585021973, "rewards/rejected": -3.174105644226074, "step": 3790 }, { "epoch": 0.91, "learning_rate": 3.8620966304154036e-07, "logits/chosen": -2.5739176273345947, "logits/rejected": -2.590895652770996, "logps/chosen": -246.83261108398438, "logps/rejected": -229.0023651123047, "loss": 0.6285, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.3254778385162354, "rewards/margins": 1.1004021167755127, "rewards/rejected": -2.425879955291748, "step": 3800 }, { "epoch": 0.92, "learning_rate": 3.8576395079336776e-07, "logits/chosen": -2.664707660675049, "logits/rejected": -2.6448731422424316, "logps/chosen": -291.40386962890625, "logps/rejected": -375.8872985839844, "loss": 0.5661, "rewards/accuracies": 0.75, "rewards/chosen": -0.9805440902709961, "rewards/margins": 0.7267992496490479, "rewards/rejected": -1.7073434591293335, "step": 3810 }, { "epoch": 0.92, "learning_rate": 3.8531823854519516e-07, "logits/chosen": -2.643444776535034, "logits/rejected": -2.5820393562316895, "logps/chosen": -255.2077178955078, "logps/rejected": -280.51702880859375, "loss": 0.514, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.719731092453003, "rewards/margins": 1.4006415605545044, "rewards/rejected": -3.120372772216797, "step": 3820 }, { "epoch": 0.92, "learning_rate": 3.848725262970226e-07, "logits/chosen": -2.6456382274627686, "logits/rejected": -2.661034345626831, "logps/chosen": -278.55535888671875, "logps/rejected": -323.1336975097656, "loss": 0.6591, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.5481061935424805, "rewards/margins": 1.3556015491485596, "rewards/rejected": -3.903707504272461, "step": 3830 }, { "epoch": 0.92, "learning_rate": 3.8442681404885e-07, "logits/chosen": -2.5775082111358643, "logits/rejected": -2.610999584197998, "logps/chosen": -232.3846893310547, "logps/rejected": -248.490966796875, "loss": 0.4868, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.610137701034546, "rewards/margins": 1.7746471166610718, "rewards/rejected": -3.384784698486328, "step": 3840 }, { "epoch": 0.93, "learning_rate": 3.839811018006774e-07, "logits/chosen": -2.613619565963745, "logits/rejected": -2.5495545864105225, "logps/chosen": -305.49139404296875, "logps/rejected": -323.3760681152344, "loss": 0.6557, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.350762128829956, "rewards/margins": 1.6953766345977783, "rewards/rejected": -4.046138763427734, "step": 3850 }, { "epoch": 0.93, "learning_rate": 3.835353895525049e-07, "logits/chosen": -2.573826313018799, "logits/rejected": -2.4991908073425293, "logps/chosen": -241.28286743164062, "logps/rejected": -255.70767211914062, "loss": 0.558, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.769360899925232, "rewards/margins": 1.9593086242675781, "rewards/rejected": -3.7286696434020996, "step": 3860 }, { "epoch": 0.93, "learning_rate": 3.830896773043323e-07, "logits/chosen": -2.3413383960723877, "logits/rejected": -2.299808979034424, "logps/chosen": -262.90850830078125, "logps/rejected": -291.4503173828125, "loss": 0.5558, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.659152626991272, "rewards/margins": 1.9936367273330688, "rewards/rejected": -3.652789354324341, "step": 3870 }, { "epoch": 0.93, "learning_rate": 3.826439650561597e-07, "logits/chosen": -2.7529985904693604, "logits/rejected": -2.639225482940674, "logps/chosen": -326.57635498046875, "logps/rejected": -326.14080810546875, "loss": 0.6663, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.5671730041503906, "rewards/margins": 0.9525227546691895, "rewards/rejected": -3.519695997238159, "step": 3880 }, { "epoch": 0.94, "learning_rate": 3.8219825280798714e-07, "logits/chosen": -2.4761345386505127, "logits/rejected": -2.435760974884033, "logps/chosen": -262.26092529296875, "logps/rejected": -250.92971801757812, "loss": 0.6382, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.9833894968032837, "rewards/margins": 0.8338977098464966, "rewards/rejected": -2.8172874450683594, "step": 3890 }, { "epoch": 0.94, "learning_rate": 3.8175254055981455e-07, "logits/chosen": -2.522275447845459, "logits/rejected": -2.507660388946533, "logps/chosen": -299.82891845703125, "logps/rejected": -256.46240234375, "loss": 0.4979, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.8757234811782837, "rewards/margins": 1.705521821975708, "rewards/rejected": -3.5812454223632812, "step": 3900 }, { "epoch": 0.94, "learning_rate": 3.8130682831164195e-07, "logits/chosen": -2.5906052589416504, "logits/rejected": -2.643200635910034, "logps/chosen": -301.50811767578125, "logps/rejected": -296.525634765625, "loss": 0.4759, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.5264441967010498, "rewards/margins": 1.784152626991272, "rewards/rejected": -3.310596466064453, "step": 3910 }, { "epoch": 0.94, "learning_rate": 3.8086111606346946e-07, "logits/chosen": -2.602121353149414, "logits/rejected": -2.4926421642303467, "logps/chosen": -289.6598205566406, "logps/rejected": -309.3394470214844, "loss": 0.483, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.2499332427978516, "rewards/margins": 2.3378403186798096, "rewards/rejected": -3.5877737998962402, "step": 3920 }, { "epoch": 0.95, "learning_rate": 3.8041540381529686e-07, "logits/chosen": -2.642411708831787, "logits/rejected": -2.5664408206939697, "logps/chosen": -297.56597900390625, "logps/rejected": -281.9163513183594, "loss": 0.5553, "rewards/accuracies": 0.75, "rewards/chosen": -2.1734910011291504, "rewards/margins": 1.599700927734375, "rewards/rejected": -3.7731919288635254, "step": 3930 }, { "epoch": 0.95, "learning_rate": 3.7996969156712426e-07, "logits/chosen": -2.7890563011169434, "logits/rejected": -2.659301280975342, "logps/chosen": -420.64630126953125, "logps/rejected": -322.561279296875, "loss": 0.5267, "rewards/accuracies": 0.75, "rewards/chosen": -1.7246720790863037, "rewards/margins": 2.2789828777313232, "rewards/rejected": -4.003654956817627, "step": 3940 }, { "epoch": 0.95, "learning_rate": 3.7952397931895167e-07, "logits/chosen": -2.694575309753418, "logits/rejected": -2.6454367637634277, "logps/chosen": -307.62640380859375, "logps/rejected": -325.6059265136719, "loss": 0.5594, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.480874538421631, "rewards/margins": 2.4836106300354004, "rewards/rejected": -4.964485168457031, "step": 3950 }, { "epoch": 0.95, "learning_rate": 3.790782670707791e-07, "logits/chosen": -2.720595359802246, "logits/rejected": -2.8017539978027344, "logps/chosen": -223.55422973632812, "logps/rejected": -273.29730224609375, "loss": 0.6146, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.248535633087158, "rewards/margins": 0.7739115953445435, "rewards/rejected": -3.022447109222412, "step": 3960 }, { "epoch": 0.96, "learning_rate": 3.786325548226065e-07, "logits/chosen": -2.695719003677368, "logits/rejected": -2.737778902053833, "logps/chosen": -242.3776092529297, "logps/rejected": -295.9952697753906, "loss": 0.6235, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.326387405395508, "rewards/margins": 0.7281680107116699, "rewards/rejected": -3.0545554161071777, "step": 3970 }, { "epoch": 0.96, "learning_rate": 3.7818684257443393e-07, "logits/chosen": -2.5355782508850098, "logits/rejected": -2.4986259937286377, "logps/chosen": -270.2203674316406, "logps/rejected": -282.6047058105469, "loss": 0.7433, "rewards/accuracies": 0.75, "rewards/chosen": -1.4541804790496826, "rewards/margins": 2.1234707832336426, "rewards/rejected": -3.577651262283325, "step": 3980 }, { "epoch": 0.96, "learning_rate": 3.777411303262614e-07, "logits/chosen": -2.602736711502075, "logits/rejected": -2.6677088737487793, "logps/chosen": -394.6764221191406, "logps/rejected": -372.6412353515625, "loss": 0.5133, "rewards/accuracies": 0.75, "rewards/chosen": -1.5492278337478638, "rewards/margins": 2.038675308227539, "rewards/rejected": -3.587902784347534, "step": 3990 }, { "epoch": 0.96, "learning_rate": 3.772954180780888e-07, "logits/chosen": -2.601288318634033, "logits/rejected": -2.5311670303344727, "logps/chosen": -211.21768188476562, "logps/rejected": -192.4173583984375, "loss": 0.5585, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.0379996299743652, "rewards/margins": 1.9101365804672241, "rewards/rejected": -2.948136329650879, "step": 4000 }, { "epoch": 0.96, "eval_logits/chosen": -2.4272851943969727, "eval_logits/rejected": -2.3968420028686523, "eval_logps/chosen": -227.1593780517578, "eval_logps/rejected": -231.3815460205078, "eval_loss": 0.5064984560012817, "eval_rewards/accuracies": 0.6725000143051147, "eval_rewards/chosen": -2.523179292678833, "eval_rewards/margins": 1.6618961095809937, "eval_rewards/rejected": -4.185075759887695, "eval_runtime": 133.45, "eval_samples_per_second": 23.649, "eval_steps_per_second": 0.375, "step": 4000 }, { "epoch": 0.97, "learning_rate": 3.768497058299162e-07, "logits/chosen": -2.664705753326416, "logits/rejected": -2.668072462081909, "logps/chosen": -245.41207885742188, "logps/rejected": -283.7060241699219, "loss": 0.4825, "rewards/accuracies": 0.75, "rewards/chosen": -1.7716989517211914, "rewards/margins": 1.5381790399551392, "rewards/rejected": -3.3098785877227783, "step": 4010 }, { "epoch": 0.97, "learning_rate": 3.7640399358174365e-07, "logits/chosen": -2.6795592308044434, "logits/rejected": -2.571094036102295, "logps/chosen": -333.5656433105469, "logps/rejected": -239.02841186523438, "loss": 0.4686, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.5428540706634521, "rewards/margins": 1.264843225479126, "rewards/rejected": -2.807697296142578, "step": 4020 }, { "epoch": 0.97, "learning_rate": 3.7595828133357105e-07, "logits/chosen": -2.746485948562622, "logits/rejected": -2.6074657440185547, "logps/chosen": -303.5380554199219, "logps/rejected": -241.12039184570312, "loss": 0.5837, "rewards/accuracies": 0.75, "rewards/chosen": -1.3950583934783936, "rewards/margins": 1.0706193447113037, "rewards/rejected": -2.465677499771118, "step": 4030 }, { "epoch": 0.97, "learning_rate": 3.7551256908539845e-07, "logits/chosen": -2.7529234886169434, "logits/rejected": -2.6367545127868652, "logps/chosen": -209.83529663085938, "logps/rejected": -193.84812927246094, "loss": 0.5237, "rewards/accuracies": 1.0, "rewards/chosen": -1.7427949905395508, "rewards/margins": 2.1338396072387695, "rewards/rejected": -3.8766345977783203, "step": 4040 }, { "epoch": 0.97, "learning_rate": 3.750668568372259e-07, "logits/chosen": -2.520416498184204, "logits/rejected": -2.4481160640716553, "logps/chosen": -293.22698974609375, "logps/rejected": -373.7021789550781, "loss": 0.7061, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.4291718006134033, "rewards/margins": 6.727791786193848, "rewards/rejected": -9.156964302062988, "step": 4050 }, { "epoch": 0.98, "learning_rate": 3.746211445890533e-07, "logits/chosen": -2.741536855697632, "logits/rejected": -2.6621148586273193, "logps/chosen": -249.78280639648438, "logps/rejected": -240.9440155029297, "loss": 0.5511, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.0934176445007324, "rewards/margins": 1.720473289489746, "rewards/rejected": -3.8138911724090576, "step": 4060 }, { "epoch": 0.98, "learning_rate": 3.741754323408807e-07, "logits/chosen": -2.7620818614959717, "logits/rejected": -2.5791823863983154, "logps/chosen": -349.82342529296875, "logps/rejected": -297.4184875488281, "loss": 0.6709, "rewards/accuracies": 0.75, "rewards/chosen": -2.5897421836853027, "rewards/margins": 1.1426117420196533, "rewards/rejected": -3.732353925704956, "step": 4070 }, { "epoch": 0.98, "learning_rate": 3.7372972009270817e-07, "logits/chosen": -2.6106772422790527, "logits/rejected": -2.502243995666504, "logps/chosen": -225.0702362060547, "logps/rejected": -279.71307373046875, "loss": 0.6106, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.020258903503418, "rewards/margins": 1.8595082759857178, "rewards/rejected": -3.8797671794891357, "step": 4080 }, { "epoch": 0.98, "learning_rate": 3.7328400784453557e-07, "logits/chosen": -2.7874178886413574, "logits/rejected": -2.8164100646972656, "logps/chosen": -296.76483154296875, "logps/rejected": -267.3506164550781, "loss": 0.5592, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.1279549598693848, "rewards/margins": 1.4412357807159424, "rewards/rejected": -3.569190263748169, "step": 4090 }, { "epoch": 0.99, "learning_rate": 3.72838295596363e-07, "logits/chosen": -2.780735731124878, "logits/rejected": -2.60390567779541, "logps/chosen": -322.66192626953125, "logps/rejected": -267.9672546386719, "loss": 0.5768, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.0631757974624634, "rewards/margins": 1.4549946784973145, "rewards/rejected": -2.5181705951690674, "step": 4100 }, { "epoch": 0.99, "learning_rate": 3.723925833481904e-07, "logits/chosen": -2.8341269493103027, "logits/rejected": -2.5473570823669434, "logps/chosen": -411.355712890625, "logps/rejected": -237.3580780029297, "loss": 0.5235, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.3425524234771729, "rewards/margins": 1.116189956665039, "rewards/rejected": -2.458742380142212, "step": 4110 }, { "epoch": 0.99, "learning_rate": 3.7194687110001783e-07, "logits/chosen": -2.276176929473877, "logits/rejected": -2.3545913696289062, "logps/chosen": -225.91464233398438, "logps/rejected": -227.13137817382812, "loss": 0.5475, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.6823476552963257, "rewards/margins": 1.8060953617095947, "rewards/rejected": -2.488442897796631, "step": 4120 }, { "epoch": 0.99, "learning_rate": 3.7150115885184524e-07, "logits/chosen": -2.8451974391937256, "logits/rejected": -2.5692057609558105, "logps/chosen": -217.84933471679688, "logps/rejected": -214.6060333251953, "loss": 0.4075, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.3134591579437256, "rewards/margins": 1.6907761096954346, "rewards/rejected": -3.004235029220581, "step": 4130 }, { "epoch": 1.0, "learning_rate": 3.7105544660367264e-07, "logits/chosen": -2.6245837211608887, "logits/rejected": -2.554424285888672, "logps/chosen": -265.3359375, "logps/rejected": -204.1483154296875, "loss": 0.5261, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.2920002937316895, "rewards/margins": 1.0050559043884277, "rewards/rejected": -3.297056198120117, "step": 4140 }, { "epoch": 1.0, "learning_rate": 3.706097343555001e-07, "logits/chosen": -2.768834352493286, "logits/rejected": -2.726285696029663, "logps/chosen": -286.7914733886719, "logps/rejected": -294.3249816894531, "loss": 0.5969, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.3674845695495605, "rewards/margins": 0.8646809458732605, "rewards/rejected": -3.232165575027466, "step": 4150 }, { "epoch": 1.0, "learning_rate": 3.701640221073275e-07, "logits/chosen": -2.623534917831421, "logits/rejected": -2.506242275238037, "logps/chosen": -358.9901123046875, "logps/rejected": -276.28985595703125, "loss": 0.3072, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5487232804298401, "rewards/margins": 4.627379417419434, "rewards/rejected": -5.176103115081787, "step": 4160 }, { "epoch": 1.0, "learning_rate": 3.697183098591549e-07, "logits/chosen": -2.725881576538086, "logits/rejected": -2.667299270629883, "logps/chosen": -288.69171142578125, "logps/rejected": -358.1936950683594, "loss": 0.102, "rewards/accuracies": 1.0, "rewards/chosen": 0.29223746061325073, "rewards/margins": 6.076581001281738, "rewards/rejected": -5.784343242645264, "step": 4170 }, { "epoch": 1.01, "learning_rate": 3.6927259761098236e-07, "logits/chosen": -2.484480857849121, "logits/rejected": -2.5674691200256348, "logps/chosen": -254.5686798095703, "logps/rejected": -359.6863708496094, "loss": 0.0855, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.383306086063385, "rewards/margins": 6.87512731552124, "rewards/rejected": -7.2584333419799805, "step": 4180 }, { "epoch": 1.01, "learning_rate": 3.6882688536280976e-07, "logits/chosen": -2.5600600242614746, "logits/rejected": -2.6257424354553223, "logps/chosen": -221.41751098632812, "logps/rejected": -294.37677001953125, "loss": 0.1015, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.1488605737686157, "rewards/margins": 5.566132068634033, "rewards/rejected": -6.714992523193359, "step": 4190 }, { "epoch": 1.01, "learning_rate": 3.6838117311463716e-07, "logits/chosen": -2.4749844074249268, "logits/rejected": -2.4374618530273438, "logps/chosen": -254.75198364257812, "logps/rejected": -318.059814453125, "loss": 0.1062, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2943580746650696, "rewards/margins": 5.920179843902588, "rewards/rejected": -6.214537620544434, "step": 4200 }, { "epoch": 1.01, "learning_rate": 3.679354608664646e-07, "logits/chosen": -2.69596791267395, "logits/rejected": -2.647221803665161, "logps/chosen": -252.98281860351562, "logps/rejected": -320.4571228027344, "loss": 0.0864, "rewards/accuracies": 1.0, "rewards/chosen": 0.039807986468076706, "rewards/margins": 6.2454118728637695, "rewards/rejected": -6.20560359954834, "step": 4210 }, { "epoch": 1.02, "learning_rate": 3.67489748618292e-07, "logits/chosen": -2.592900037765503, "logits/rejected": -2.448998212814331, "logps/chosen": -200.04762268066406, "logps/rejected": -247.912109375, "loss": 0.1034, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.484206199645996, "rewards/margins": 3.945133686065674, "rewards/rejected": -5.429339408874512, "step": 4220 }, { "epoch": 1.02, "learning_rate": 3.670440363701194e-07, "logits/chosen": -2.6554009914398193, "logits/rejected": -2.5679659843444824, "logps/chosen": -279.2522888183594, "logps/rejected": -315.81829833984375, "loss": 0.1252, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.9038124084472656, "rewards/margins": 6.856900691986084, "rewards/rejected": -5.953088760375977, "step": 4230 }, { "epoch": 1.02, "learning_rate": 3.665983241219469e-07, "logits/chosen": -2.5731217861175537, "logits/rejected": -2.467867136001587, "logps/chosen": -237.4766082763672, "logps/rejected": -258.3265075683594, "loss": 0.0956, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.0812922939658165, "rewards/margins": 6.334844589233398, "rewards/rejected": -6.253551483154297, "step": 4240 }, { "epoch": 1.02, "learning_rate": 3.661526118737743e-07, "logits/chosen": -2.4351553916931152, "logits/rejected": -2.418645143508911, "logps/chosen": -370.56768798828125, "logps/rejected": -375.59210205078125, "loss": 0.1015, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2138526737689972, "rewards/margins": 6.076406002044678, "rewards/rejected": -6.290258407592773, "step": 4250 }, { "epoch": 1.03, "learning_rate": 3.657068996256017e-07, "logits/chosen": -2.349137306213379, "logits/rejected": -2.307180881500244, "logps/chosen": -250.9742889404297, "logps/rejected": -345.11639404296875, "loss": 0.1001, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.6029349565505981, "rewards/margins": 7.945761680603027, "rewards/rejected": -7.342826843261719, "step": 4260 }, { "epoch": 1.03, "learning_rate": 3.6526118737742914e-07, "logits/chosen": -2.5820562839508057, "logits/rejected": -2.560638189315796, "logps/chosen": -230.8574676513672, "logps/rejected": -302.25787353515625, "loss": 0.0916, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 1.4544862508773804, "rewards/margins": 6.655741214752197, "rewards/rejected": -5.201254844665527, "step": 4270 }, { "epoch": 1.03, "learning_rate": 3.6481547512925654e-07, "logits/chosen": -2.2908504009246826, "logits/rejected": -2.3787224292755127, "logps/chosen": -178.44161987304688, "logps/rejected": -253.1040496826172, "loss": 0.097, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.7253895998001099, "rewards/margins": 6.9459428787231445, "rewards/rejected": -6.220553398132324, "step": 4280 }, { "epoch": 1.03, "learning_rate": 3.6436976288108395e-07, "logits/chosen": -2.4098219871520996, "logits/rejected": -2.453495979309082, "logps/chosen": -290.34503173828125, "logps/rejected": -380.69964599609375, "loss": 0.1909, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.05411381646990776, "rewards/margins": 6.767613887786865, "rewards/rejected": -6.713500022888184, "step": 4290 }, { "epoch": 1.03, "learning_rate": 3.6392405063291135e-07, "logits/chosen": -2.693671703338623, "logits/rejected": -2.5928988456726074, "logps/chosen": -302.6961975097656, "logps/rejected": -344.5617370605469, "loss": 0.1111, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.14878758788108826, "rewards/margins": 6.189462184906006, "rewards/rejected": -6.040674209594727, "step": 4300 }, { "epoch": 1.04, "learning_rate": 3.634783383847388e-07, "logits/chosen": -2.6140859127044678, "logits/rejected": -2.3915727138519287, "logps/chosen": -358.9208984375, "logps/rejected": -286.369384765625, "loss": 0.0702, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.029497016221284866, "rewards/margins": 6.153254508972168, "rewards/rejected": -6.123757839202881, "step": 4310 }, { "epoch": 1.04, "learning_rate": 3.630326261365662e-07, "logits/chosen": -2.7043561935424805, "logits/rejected": -2.4338130950927734, "logps/chosen": -222.9595947265625, "logps/rejected": -223.4053192138672, "loss": 0.0501, "rewards/accuracies": 1.0, "rewards/chosen": -0.6035966277122498, "rewards/margins": 5.239047050476074, "rewards/rejected": -5.842643737792969, "step": 4320 }, { "epoch": 1.04, "learning_rate": 3.625869138883936e-07, "logits/chosen": -2.6249303817749023, "logits/rejected": -2.669938087463379, "logps/chosen": -222.0033721923828, "logps/rejected": -270.62115478515625, "loss": 0.1371, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.3372684121131897, "rewards/margins": 7.880601406097412, "rewards/rejected": -7.543332576751709, "step": 4330 }, { "epoch": 1.04, "learning_rate": 3.6214120164022107e-07, "logits/chosen": -2.5248541831970215, "logits/rejected": -2.5408248901367188, "logps/chosen": -186.41961669921875, "logps/rejected": -285.8816223144531, "loss": 0.0782, "rewards/accuracies": 1.0, "rewards/chosen": -0.9711707234382629, "rewards/margins": 5.940455436706543, "rewards/rejected": -6.911625862121582, "step": 4340 }, { "epoch": 1.05, "learning_rate": 3.6169548939204847e-07, "logits/chosen": -2.280282735824585, "logits/rejected": -2.3826003074645996, "logps/chosen": -203.95257568359375, "logps/rejected": -264.19281005859375, "loss": 0.0607, "rewards/accuracies": 1.0, "rewards/chosen": -1.7481842041015625, "rewards/margins": 4.814732551574707, "rewards/rejected": -6.562915802001953, "step": 4350 }, { "epoch": 1.05, "learning_rate": 3.6124977714387587e-07, "logits/chosen": -2.3525540828704834, "logits/rejected": -2.377518892288208, "logps/chosen": -234.76779174804688, "logps/rejected": -278.06097412109375, "loss": 0.2528, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.8500163555145264, "rewards/margins": 4.516103267669678, "rewards/rejected": -7.366120338439941, "step": 4360 }, { "epoch": 1.05, "learning_rate": 3.6080406489570333e-07, "logits/chosen": -2.4346866607666016, "logits/rejected": -2.4900708198547363, "logps/chosen": -243.305908203125, "logps/rejected": -304.49737548828125, "loss": 0.0877, "rewards/accuracies": 1.0, "rewards/chosen": -1.2673728466033936, "rewards/margins": 6.260402202606201, "rewards/rejected": -7.527773857116699, "step": 4370 }, { "epoch": 1.05, "learning_rate": 3.6035835264753073e-07, "logits/chosen": -2.5628085136413574, "logits/rejected": -2.5256426334381104, "logps/chosen": -207.5811004638672, "logps/rejected": -283.73809814453125, "loss": 0.132, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9077810049057007, "rewards/margins": 8.054966926574707, "rewards/rejected": -8.962748527526855, "step": 4380 }, { "epoch": 1.06, "learning_rate": 3.5991264039935813e-07, "logits/chosen": -2.597865581512451, "logits/rejected": -2.5068411827087402, "logps/chosen": -194.13424682617188, "logps/rejected": -194.2491912841797, "loss": 0.0857, "rewards/accuracies": 1.0, "rewards/chosen": -0.4135921597480774, "rewards/margins": 5.746599197387695, "rewards/rejected": -6.160191535949707, "step": 4390 }, { "epoch": 1.06, "learning_rate": 3.594669281511856e-07, "logits/chosen": -2.5832905769348145, "logits/rejected": -2.4489328861236572, "logps/chosen": -266.7347717285156, "logps/rejected": -213.3500213623047, "loss": 0.0829, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.097774863243103, "rewards/margins": 6.298386096954346, "rewards/rejected": -7.396161079406738, "step": 4400 }, { "epoch": 1.06, "eval_logits/chosen": -2.2565360069274902, "eval_logits/rejected": -2.21488356590271, "eval_logps/chosen": -240.26016235351562, "eval_logps/rejected": -250.58619689941406, "eval_loss": 0.530575156211853, "eval_rewards/accuracies": 0.6949999928474426, "eval_rewards/chosen": -3.8332605361938477, "eval_rewards/margins": 2.272279977798462, "eval_rewards/rejected": -6.105540752410889, "eval_runtime": 133.9179, "eval_samples_per_second": 23.567, "eval_steps_per_second": 0.373, "step": 4400 }, { "epoch": 1.06, "learning_rate": 3.59021215903013e-07, "logits/chosen": -2.3539633750915527, "logits/rejected": -2.4350552558898926, "logps/chosen": -210.99447631835938, "logps/rejected": -279.7448425292969, "loss": 0.0853, "rewards/accuracies": 1.0, "rewards/chosen": -1.7387447357177734, "rewards/margins": 6.219385147094727, "rewards/rejected": -7.9581298828125, "step": 4410 }, { "epoch": 1.06, "learning_rate": 3.585755036548404e-07, "logits/chosen": -2.6168372631073, "logits/rejected": -2.454535961151123, "logps/chosen": -310.8194274902344, "logps/rejected": -315.9908752441406, "loss": 0.0783, "rewards/accuracies": 1.0, "rewards/chosen": -1.3085572719573975, "rewards/margins": 7.067923545837402, "rewards/rejected": -8.376481056213379, "step": 4420 }, { "epoch": 1.07, "learning_rate": 3.5812979140666785e-07, "logits/chosen": -2.6452255249023438, "logits/rejected": -2.5484931468963623, "logps/chosen": -262.70172119140625, "logps/rejected": -332.6692810058594, "loss": 0.0696, "rewards/accuracies": 1.0, "rewards/chosen": 0.2865467667579651, "rewards/margins": 8.886618614196777, "rewards/rejected": -8.600071907043457, "step": 4430 }, { "epoch": 1.07, "learning_rate": 3.5768407915849525e-07, "logits/chosen": -2.5716865062713623, "logits/rejected": -2.6295697689056396, "logps/chosen": -219.3745880126953, "logps/rejected": -295.82073974609375, "loss": 0.1842, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.3263219594955444, "rewards/margins": 6.227828025817871, "rewards/rejected": -7.554150581359863, "step": 4440 }, { "epoch": 1.07, "learning_rate": 3.5723836691032266e-07, "logits/chosen": -2.721628189086914, "logits/rejected": -2.573312520980835, "logps/chosen": -282.0672912597656, "logps/rejected": -300.0820617675781, "loss": 0.1691, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.3201095759868622, "rewards/margins": 7.496065616607666, "rewards/rejected": -7.175955295562744, "step": 4450 }, { "epoch": 1.07, "learning_rate": 3.5679265466215006e-07, "logits/chosen": -2.6510682106018066, "logits/rejected": -2.6375367641448975, "logps/chosen": -280.3169250488281, "logps/rejected": -317.9718322753906, "loss": 0.0615, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.035267461091279984, "rewards/margins": 6.4750657081604, "rewards/rejected": -6.510333061218262, "step": 4460 }, { "epoch": 1.08, "learning_rate": 3.563469424139775e-07, "logits/chosen": -2.6726248264312744, "logits/rejected": -2.5161356925964355, "logps/chosen": -266.86773681640625, "logps/rejected": -280.0702209472656, "loss": 0.1067, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.0053305355831980705, "rewards/margins": 7.370538234710693, "rewards/rejected": -7.375868797302246, "step": 4470 }, { "epoch": 1.08, "learning_rate": 3.559012301658049e-07, "logits/chosen": -2.4348297119140625, "logits/rejected": -2.3549609184265137, "logps/chosen": -288.95452880859375, "logps/rejected": -375.9358215332031, "loss": 0.1212, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.1854562759399414, "rewards/margins": 8.024209022521973, "rewards/rejected": -7.838752746582031, "step": 4480 }, { "epoch": 1.08, "learning_rate": 3.554555179176323e-07, "logits/chosen": -2.579810619354248, "logits/rejected": -2.4765095710754395, "logps/chosen": -244.8577117919922, "logps/rejected": -270.9769592285156, "loss": 0.1491, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 2.057324171066284, "rewards/margins": 9.266227722167969, "rewards/rejected": -7.208902835845947, "step": 4490 }, { "epoch": 1.08, "learning_rate": 3.550098056694598e-07, "logits/chosen": -2.6141881942749023, "logits/rejected": -2.6928577423095703, "logps/chosen": -209.54342651367188, "logps/rejected": -355.61163330078125, "loss": 0.0718, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.4131593704223633, "rewards/margins": 8.726862907409668, "rewards/rejected": -8.313703536987305, "step": 4500 }, { "epoch": 1.09, "learning_rate": 3.545640934212872e-07, "logits/chosen": -2.6281442642211914, "logits/rejected": -2.420114517211914, "logps/chosen": -239.50320434570312, "logps/rejected": -216.16220092773438, "loss": 0.0776, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.6639260053634644, "rewards/margins": 7.536489009857178, "rewards/rejected": -6.872563362121582, "step": 4510 }, { "epoch": 1.09, "learning_rate": 3.541183811731146e-07, "logits/chosen": -2.6011791229248047, "logits/rejected": -2.56583571434021, "logps/chosen": -269.5140686035156, "logps/rejected": -278.8650817871094, "loss": 0.0671, "rewards/accuracies": 1.0, "rewards/chosen": 0.6813734769821167, "rewards/margins": 8.699769973754883, "rewards/rejected": -8.018396377563477, "step": 4520 }, { "epoch": 1.09, "learning_rate": 3.5367266892494204e-07, "logits/chosen": -2.620603561401367, "logits/rejected": -2.5572497844696045, "logps/chosen": -291.53448486328125, "logps/rejected": -297.11328125, "loss": 0.0778, "rewards/accuracies": 1.0, "rewards/chosen": -0.2895924150943756, "rewards/margins": 6.378467082977295, "rewards/rejected": -6.668059349060059, "step": 4530 }, { "epoch": 1.09, "learning_rate": 3.5322695667676944e-07, "logits/chosen": -2.5830204486846924, "logits/rejected": -2.5361826419830322, "logps/chosen": -200.8857421875, "logps/rejected": -299.4195251464844, "loss": 0.0902, "rewards/accuracies": 1.0, "rewards/chosen": 0.07186810672283173, "rewards/margins": 7.2964348793029785, "rewards/rejected": -7.224567413330078, "step": 4540 }, { "epoch": 1.1, "learning_rate": 3.5278124442859684e-07, "logits/chosen": -2.5962417125701904, "logits/rejected": -2.556992292404175, "logps/chosen": -241.4461212158203, "logps/rejected": -348.4083557128906, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": 0.9010728597640991, "rewards/margins": 10.432828903198242, "rewards/rejected": -9.531754493713379, "step": 4550 }, { "epoch": 1.1, "learning_rate": 3.523355321804243e-07, "logits/chosen": -2.623161554336548, "logits/rejected": -2.672816276550293, "logps/chosen": -306.97027587890625, "logps/rejected": -397.1775207519531, "loss": 0.1126, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.9301154017448425, "rewards/margins": 8.396772384643555, "rewards/rejected": -7.466658115386963, "step": 4560 }, { "epoch": 1.1, "learning_rate": 3.518898199322517e-07, "logits/chosen": -2.687253475189209, "logits/rejected": -2.619741916656494, "logps/chosen": -347.5944519042969, "logps/rejected": -337.0247497558594, "loss": 0.1122, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.29035288095474243, "rewards/margins": 6.988702297210693, "rewards/rejected": -6.6983489990234375, "step": 4570 }, { "epoch": 1.1, "learning_rate": 3.514441076840791e-07, "logits/chosen": -2.512484312057495, "logits/rejected": -2.4654126167297363, "logps/chosen": -213.0530242919922, "logps/rejected": -314.42327880859375, "loss": 0.0805, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.24507789313793182, "rewards/margins": 7.889502048492432, "rewards/rejected": -8.1345796585083, "step": 4580 }, { "epoch": 1.1, "learning_rate": 3.5099839543590656e-07, "logits/chosen": -2.444556713104248, "logits/rejected": -2.4471569061279297, "logps/chosen": -308.90423583984375, "logps/rejected": -515.9949340820312, "loss": 0.1229, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 1.2889782190322876, "rewards/margins": 11.80101490020752, "rewards/rejected": -10.51203727722168, "step": 4590 }, { "epoch": 1.11, "learning_rate": 3.50552683187734e-07, "logits/chosen": -2.689913034439087, "logits/rejected": -2.5694491863250732, "logps/chosen": -337.18414306640625, "logps/rejected": -306.79388427734375, "loss": 0.0925, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.7814053297042847, "rewards/margins": 6.829098701477051, "rewards/rejected": -6.047692775726318, "step": 4600 }, { "epoch": 1.11, "learning_rate": 3.501069709395614e-07, "logits/chosen": -2.7009692192077637, "logits/rejected": -2.654804229736328, "logps/chosen": -286.5771484375, "logps/rejected": -297.51361083984375, "loss": 0.0954, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.4205569624900818, "rewards/margins": 7.934011936187744, "rewards/rejected": -7.513455390930176, "step": 4610 }, { "epoch": 1.11, "learning_rate": 3.496612586913889e-07, "logits/chosen": -2.5797932147979736, "logits/rejected": -2.5282037258148193, "logps/chosen": -354.33245849609375, "logps/rejected": -367.6128845214844, "loss": 0.1232, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.8205696940422058, "rewards/margins": 8.912364959716797, "rewards/rejected": -8.091795921325684, "step": 4620 }, { "epoch": 1.11, "learning_rate": 3.492155464432163e-07, "logits/chosen": -2.7749524116516113, "logits/rejected": -2.616973400115967, "logps/chosen": -389.33636474609375, "logps/rejected": -307.29693603515625, "loss": 0.0655, "rewards/accuracies": 1.0, "rewards/chosen": 1.2715777158737183, "rewards/margins": 6.716928005218506, "rewards/rejected": -5.445350170135498, "step": 4630 }, { "epoch": 1.12, "learning_rate": 3.487698341950437e-07, "logits/chosen": -2.6227383613586426, "logits/rejected": -2.583625316619873, "logps/chosen": -219.1520538330078, "logps/rejected": -240.3316650390625, "loss": 0.1236, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.37693899869918823, "rewards/margins": 7.113305568695068, "rewards/rejected": -7.490243434906006, "step": 4640 }, { "epoch": 1.12, "learning_rate": 3.483241219468711e-07, "logits/chosen": -2.567394733428955, "logits/rejected": -2.6633286476135254, "logps/chosen": -237.76931762695312, "logps/rejected": -346.8174743652344, "loss": 0.121, "rewards/accuracies": 1.0, "rewards/chosen": 0.017298942431807518, "rewards/margins": 7.123035430908203, "rewards/rejected": -7.10573673248291, "step": 4650 }, { "epoch": 1.12, "learning_rate": 3.4787840969869854e-07, "logits/chosen": -2.5288941860198975, "logits/rejected": -2.5487313270568848, "logps/chosen": -273.1539306640625, "logps/rejected": -327.77471923828125, "loss": 0.0768, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.34701523184776306, "rewards/margins": 8.51653003692627, "rewards/rejected": -8.169514656066895, "step": 4660 }, { "epoch": 1.12, "learning_rate": 3.4743269745052594e-07, "logits/chosen": -2.729304313659668, "logits/rejected": -2.6582436561584473, "logps/chosen": -188.26229858398438, "logps/rejected": -237.3467254638672, "loss": 0.0703, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.063475251197815, "rewards/margins": 6.28040075302124, "rewards/rejected": -7.343875885009766, "step": 4670 }, { "epoch": 1.13, "learning_rate": 3.4698698520235335e-07, "logits/chosen": -2.615694999694824, "logits/rejected": -2.5419116020202637, "logps/chosen": -209.73489379882812, "logps/rejected": -298.9879150390625, "loss": 0.1043, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.41462770104408264, "rewards/margins": 7.651031494140625, "rewards/rejected": -8.065659523010254, "step": 4680 }, { "epoch": 1.13, "learning_rate": 3.465412729541808e-07, "logits/chosen": -2.561784267425537, "logits/rejected": -2.501864194869995, "logps/chosen": -298.063720703125, "logps/rejected": -343.0885314941406, "loss": 0.0979, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0329442024230957, "rewards/margins": 7.5313615798950195, "rewards/rejected": -8.564305305480957, "step": 4690 }, { "epoch": 1.13, "learning_rate": 3.460955607060082e-07, "logits/chosen": -2.762120485305786, "logits/rejected": -2.6479175090789795, "logps/chosen": -328.4841003417969, "logps/rejected": -332.92572021484375, "loss": 0.14, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.12023515999317169, "rewards/margins": 6.435657501220703, "rewards/rejected": -6.315423011779785, "step": 4700 }, { "epoch": 1.13, "learning_rate": 3.456498484578356e-07, "logits/chosen": -2.7214341163635254, "logits/rejected": -2.6656060218811035, "logps/chosen": -225.5690460205078, "logps/rejected": -299.20703125, "loss": 0.0973, "rewards/accuracies": 1.0, "rewards/chosen": -0.33568239212036133, "rewards/margins": 6.949644565582275, "rewards/rejected": -7.2853264808654785, "step": 4710 }, { "epoch": 1.14, "learning_rate": 3.4520413620966306e-07, "logits/chosen": -2.5441741943359375, "logits/rejected": -2.5853934288024902, "logps/chosen": -199.78700256347656, "logps/rejected": -308.087646484375, "loss": 0.1425, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.6698516607284546, "rewards/margins": 7.509990692138672, "rewards/rejected": -6.8401384353637695, "step": 4720 }, { "epoch": 1.14, "learning_rate": 3.4475842396149047e-07, "logits/chosen": -2.5010409355163574, "logits/rejected": -2.500927448272705, "logps/chosen": -233.6601104736328, "logps/rejected": -353.69781494140625, "loss": 0.0913, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.5150055289268494, "rewards/margins": 8.125643730163574, "rewards/rejected": -8.640649795532227, "step": 4730 }, { "epoch": 1.14, "learning_rate": 3.4431271171331787e-07, "logits/chosen": -2.6142001152038574, "logits/rejected": -2.3814339637756348, "logps/chosen": -292.4364013671875, "logps/rejected": -319.9347229003906, "loss": 0.1175, "rewards/accuracies": 1.0, "rewards/chosen": -0.1018899455666542, "rewards/margins": 8.109670639038086, "rewards/rejected": -8.211560249328613, "step": 4740 }, { "epoch": 1.14, "learning_rate": 3.438669994651453e-07, "logits/chosen": -2.5011606216430664, "logits/rejected": -2.4382424354553223, "logps/chosen": -208.596435546875, "logps/rejected": -326.25323486328125, "loss": 0.0888, "rewards/accuracies": 1.0, "rewards/chosen": 0.11111573874950409, "rewards/margins": 8.364480972290039, "rewards/rejected": -8.253364562988281, "step": 4750 }, { "epoch": 1.15, "learning_rate": 3.4342128721697273e-07, "logits/chosen": -2.535123109817505, "logits/rejected": -2.3774056434631348, "logps/chosen": -261.7142028808594, "logps/rejected": -269.27703857421875, "loss": 0.128, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.3187916278839111, "rewards/margins": 5.030508518218994, "rewards/rejected": -6.349300384521484, "step": 4760 }, { "epoch": 1.15, "learning_rate": 3.4297557496880013e-07, "logits/chosen": -2.6771738529205322, "logits/rejected": -2.4687163829803467, "logps/chosen": -309.0341796875, "logps/rejected": -324.45794677734375, "loss": 0.0581, "rewards/accuracies": 1.0, "rewards/chosen": -0.0969277173280716, "rewards/margins": 8.045573234558105, "rewards/rejected": -8.142500877380371, "step": 4770 }, { "epoch": 1.15, "learning_rate": 3.425298627206276e-07, "logits/chosen": -2.4616754055023193, "logits/rejected": -2.5115413665771484, "logps/chosen": -228.31356811523438, "logps/rejected": -312.6092834472656, "loss": 0.1108, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.1317485272884369, "rewards/margins": 6.608421325683594, "rewards/rejected": -6.740170478820801, "step": 4780 }, { "epoch": 1.15, "learning_rate": 3.42084150472455e-07, "logits/chosen": -2.6824426651000977, "logits/rejected": -2.4847335815429688, "logps/chosen": -254.8378448486328, "logps/rejected": -316.4959411621094, "loss": 0.1683, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.210846185684204, "rewards/margins": 7.360320091247559, "rewards/rejected": -8.571165084838867, "step": 4790 }, { "epoch": 1.16, "learning_rate": 3.416384382242824e-07, "logits/chosen": -2.5573363304138184, "logits/rejected": -2.6037890911102295, "logps/chosen": -204.7184600830078, "logps/rejected": -264.3968505859375, "loss": 0.1383, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.4384899139404297, "rewards/margins": 5.136371612548828, "rewards/rejected": -5.574862003326416, "step": 4800 }, { "epoch": 1.16, "eval_logits/chosen": -2.3642618656158447, "eval_logits/rejected": -2.3301374912261963, "eval_logps/chosen": -240.0742645263672, "eval_logps/rejected": -246.86351013183594, "eval_loss": 0.543235182762146, "eval_rewards/accuracies": 0.6675000190734863, "eval_rewards/chosen": -3.8146703243255615, "eval_rewards/margins": 1.918602705001831, "eval_rewards/rejected": -5.733273506164551, "eval_runtime": 134.4375, "eval_samples_per_second": 23.476, "eval_steps_per_second": 0.372, "step": 4800 }, { "epoch": 1.16, "learning_rate": 3.411927259761098e-07, "logits/chosen": -2.652888774871826, "logits/rejected": -2.55576229095459, "logps/chosen": -223.55447387695312, "logps/rejected": -250.8131866455078, "loss": 0.1212, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.21370844542980194, "rewards/margins": 6.394108772277832, "rewards/rejected": -6.60781717300415, "step": 4810 }, { "epoch": 1.16, "learning_rate": 3.4074701372793725e-07, "logits/chosen": -2.676522731781006, "logits/rejected": -2.4591970443725586, "logps/chosen": -237.09603881835938, "logps/rejected": -273.28900146484375, "loss": 0.1163, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.6377540826797485, "rewards/margins": 6.5416717529296875, "rewards/rejected": -8.179426193237305, "step": 4820 }, { "epoch": 1.16, "learning_rate": 3.4030130147976465e-07, "logits/chosen": -2.5285964012145996, "logits/rejected": -2.544323444366455, "logps/chosen": -322.7830810546875, "logps/rejected": -394.04583740234375, "loss": 0.0904, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.2724666893482208, "rewards/margins": 9.214590072631836, "rewards/rejected": -9.48705768585205, "step": 4830 }, { "epoch": 1.16, "learning_rate": 3.3985558923159206e-07, "logits/chosen": -2.651007652282715, "logits/rejected": -2.6063904762268066, "logps/chosen": -284.8469543457031, "logps/rejected": -305.2882385253906, "loss": 0.0995, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8186386823654175, "rewards/margins": 7.470262050628662, "rewards/rejected": -8.288900375366211, "step": 4840 }, { "epoch": 1.17, "learning_rate": 3.394098769834195e-07, "logits/chosen": -2.7330214977264404, "logits/rejected": -2.7236289978027344, "logps/chosen": -313.14666748046875, "logps/rejected": -385.87530517578125, "loss": 0.114, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.0422546863555908, "rewards/margins": 6.971061706542969, "rewards/rejected": -8.01331615447998, "step": 4850 }, { "epoch": 1.17, "learning_rate": 3.389641647352469e-07, "logits/chosen": -2.62068247795105, "logits/rejected": -2.59302020072937, "logps/chosen": -181.2661590576172, "logps/rejected": -293.93359375, "loss": 0.0889, "rewards/accuracies": 1.0, "rewards/chosen": -1.053655982017517, "rewards/margins": 6.890317440032959, "rewards/rejected": -7.943974494934082, "step": 4860 }, { "epoch": 1.17, "learning_rate": 3.385184524870743e-07, "logits/chosen": -2.7119078636169434, "logits/rejected": -2.5695385932922363, "logps/chosen": -351.404052734375, "logps/rejected": -259.8926696777344, "loss": 0.1015, "rewards/accuracies": 1.0, "rewards/chosen": -0.2817792296409607, "rewards/margins": 5.497501850128174, "rewards/rejected": -5.779280662536621, "step": 4870 }, { "epoch": 1.17, "learning_rate": 3.380727402389018e-07, "logits/chosen": -2.640078544616699, "logits/rejected": -2.616539478302002, "logps/chosen": -202.4800567626953, "logps/rejected": -362.8905334472656, "loss": 0.1176, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.41821393370628357, "rewards/margins": 9.335319519042969, "rewards/rejected": -9.753533363342285, "step": 4880 }, { "epoch": 1.18, "learning_rate": 3.376270279907292e-07, "logits/chosen": -2.784754991531372, "logits/rejected": -2.621223211288452, "logps/chosen": -294.47381591796875, "logps/rejected": -250.52694702148438, "loss": 0.1217, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.5304609537124634, "rewards/margins": 6.352158546447754, "rewards/rejected": -5.821697235107422, "step": 4890 }, { "epoch": 1.18, "learning_rate": 3.371813157425566e-07, "logits/chosen": -2.585599899291992, "logits/rejected": -2.4793825149536133, "logps/chosen": -350.9255676269531, "logps/rejected": -284.8587646484375, "loss": 0.1114, "rewards/accuracies": 1.0, "rewards/chosen": -0.4237033724784851, "rewards/margins": 6.879862308502197, "rewards/rejected": -7.303567409515381, "step": 4900 }, { "epoch": 1.18, "learning_rate": 3.3673560349438404e-07, "logits/chosen": -2.536411762237549, "logits/rejected": -2.687934398651123, "logps/chosen": -231.5205535888672, "logps/rejected": -356.8689880371094, "loss": 0.1045, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.6803652048110962, "rewards/margins": 8.608926773071289, "rewards/rejected": -9.289292335510254, "step": 4910 }, { "epoch": 1.18, "learning_rate": 3.3628989124621144e-07, "logits/chosen": -2.7165687084198, "logits/rejected": -2.590156078338623, "logps/chosen": -210.8171844482422, "logps/rejected": -249.25381469726562, "loss": 0.1056, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.7293668985366821, "rewards/margins": 8.638975143432617, "rewards/rejected": -7.909608364105225, "step": 4920 }, { "epoch": 1.19, "learning_rate": 3.3584417899803884e-07, "logits/chosen": -2.709575891494751, "logits/rejected": -2.5665009021759033, "logps/chosen": -278.49725341796875, "logps/rejected": -300.79083251953125, "loss": 0.089, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.9219233393669128, "rewards/margins": 6.195326328277588, "rewards/rejected": -7.117249488830566, "step": 4930 }, { "epoch": 1.19, "learning_rate": 3.353984667498663e-07, "logits/chosen": -2.4022912979125977, "logits/rejected": -2.588256359100342, "logps/chosen": -246.93045043945312, "logps/rejected": -302.0685119628906, "loss": 0.1222, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2618193626403809, "rewards/margins": 6.316469669342041, "rewards/rejected": -7.578289985656738, "step": 4940 }, { "epoch": 1.19, "learning_rate": 3.349527545016937e-07, "logits/chosen": -2.6150996685028076, "logits/rejected": -2.436365842819214, "logps/chosen": -362.4632873535156, "logps/rejected": -396.39971923828125, "loss": 0.0859, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.31028467416763306, "rewards/margins": 7.695733070373535, "rewards/rejected": -7.3854475021362305, "step": 4950 }, { "epoch": 1.19, "learning_rate": 3.345070422535211e-07, "logits/chosen": -2.392132520675659, "logits/rejected": -2.520671844482422, "logps/chosen": -163.79122924804688, "logps/rejected": -267.2300720214844, "loss": 0.099, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.057686686515808, "rewards/margins": 7.228782653808594, "rewards/rejected": -8.286470413208008, "step": 4960 }, { "epoch": 1.2, "learning_rate": 3.340613300053485e-07, "logits/chosen": -2.809436559677124, "logits/rejected": -2.632969617843628, "logps/chosen": -301.5816955566406, "logps/rejected": -422.3345642089844, "loss": 0.102, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.04304458945989609, "rewards/margins": 9.245333671569824, "rewards/rejected": -9.202289581298828, "step": 4970 }, { "epoch": 1.2, "learning_rate": 3.3361561775717596e-07, "logits/chosen": -2.3093483448028564, "logits/rejected": -2.373852491378784, "logps/chosen": -197.71905517578125, "logps/rejected": -246.5425567626953, "loss": 0.0629, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.19547121226787567, "rewards/margins": 6.883888244628906, "rewards/rejected": -6.688416957855225, "step": 4980 }, { "epoch": 1.2, "learning_rate": 3.3316990550900336e-07, "logits/chosen": -2.665581464767456, "logits/rejected": -2.588740587234497, "logps/chosen": -267.37750244140625, "logps/rejected": -295.717041015625, "loss": 0.1645, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5497006177902222, "rewards/margins": 7.928070068359375, "rewards/rejected": -8.477770805358887, "step": 4990 }, { "epoch": 1.2, "learning_rate": 3.3272419326083077e-07, "logits/chosen": -2.3988068103790283, "logits/rejected": -2.463813066482544, "logps/chosen": -258.91229248046875, "logps/rejected": -241.25588989257812, "loss": 0.1358, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.1083600521087646, "rewards/margins": 5.589565753936768, "rewards/rejected": -7.697924613952637, "step": 5000 }, { "epoch": 1.21, "learning_rate": 3.322784810126582e-07, "logits/chosen": -2.5505945682525635, "logits/rejected": -2.598329544067383, "logps/chosen": -228.31265258789062, "logps/rejected": -260.6291198730469, "loss": 0.2057, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.1303789615631104, "rewards/margins": 5.149099826812744, "rewards/rejected": -6.279478549957275, "step": 5010 }, { "epoch": 1.21, "learning_rate": 3.318327687644856e-07, "logits/chosen": -2.5362915992736816, "logits/rejected": -2.499586582183838, "logps/chosen": -332.15203857421875, "logps/rejected": -346.24346923828125, "loss": 0.0936, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.3879649043083191, "rewards/margins": 7.188324928283691, "rewards/rejected": -7.576289176940918, "step": 5020 }, { "epoch": 1.21, "learning_rate": 3.3138705651631303e-07, "logits/chosen": -2.474151849746704, "logits/rejected": -2.479788303375244, "logps/chosen": -290.87408447265625, "logps/rejected": -277.9864501953125, "loss": 0.1125, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.0720984935760498, "rewards/margins": 5.177456378936768, "rewards/rejected": -6.2495551109313965, "step": 5030 }, { "epoch": 1.21, "learning_rate": 3.309413442681405e-07, "logits/chosen": -2.63665771484375, "logits/rejected": -2.5593645572662354, "logps/chosen": -230.1723175048828, "logps/rejected": -318.81451416015625, "loss": 0.1069, "rewards/accuracies": 1.0, "rewards/chosen": -0.13830193877220154, "rewards/margins": 6.594658851623535, "rewards/rejected": -6.7329607009887695, "step": 5040 }, { "epoch": 1.22, "learning_rate": 3.304956320199679e-07, "logits/chosen": -2.2486722469329834, "logits/rejected": -2.3361761569976807, "logps/chosen": -215.96115112304688, "logps/rejected": -291.5397033691406, "loss": 0.0685, "rewards/accuracies": 1.0, "rewards/chosen": -1.0104453563690186, "rewards/margins": 7.1230669021606445, "rewards/rejected": -8.133512496948242, "step": 5050 }, { "epoch": 1.22, "learning_rate": 3.300499197717953e-07, "logits/chosen": -2.5913820266723633, "logits/rejected": -2.387864589691162, "logps/chosen": -318.3968505859375, "logps/rejected": -324.6764221191406, "loss": 0.0845, "rewards/accuracies": 1.0, "rewards/chosen": 0.11920301616191864, "rewards/margins": 8.436397552490234, "rewards/rejected": -8.317194938659668, "step": 5060 }, { "epoch": 1.22, "learning_rate": 3.2960420752362275e-07, "logits/chosen": -2.5653076171875, "logits/rejected": -2.451930522918701, "logps/chosen": -246.45553588867188, "logps/rejected": -275.3322448730469, "loss": 0.0879, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.5844142436981201, "rewards/margins": 7.900046348571777, "rewards/rejected": -9.484460830688477, "step": 5070 }, { "epoch": 1.22, "learning_rate": 3.2915849527545015e-07, "logits/chosen": -2.5098018646240234, "logits/rejected": -2.5696258544921875, "logps/chosen": -235.1271514892578, "logps/rejected": -378.62884521484375, "loss": 0.0676, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.0039904117584229, "rewards/margins": 8.815313339233398, "rewards/rejected": -9.819302558898926, "step": 5080 }, { "epoch": 1.23, "learning_rate": 3.2871278302727755e-07, "logits/chosen": -2.520859718322754, "logits/rejected": -2.5557596683502197, "logps/chosen": -157.81492614746094, "logps/rejected": -281.9358215332031, "loss": 0.0865, "rewards/accuracies": 1.0, "rewards/chosen": -0.4797874987125397, "rewards/margins": 6.32138729095459, "rewards/rejected": -6.801175117492676, "step": 5090 }, { "epoch": 1.23, "learning_rate": 3.28267070779105e-07, "logits/chosen": -2.573880434036255, "logits/rejected": -2.395332098007202, "logps/chosen": -217.60202026367188, "logps/rejected": -219.1641387939453, "loss": 0.0857, "rewards/accuracies": 1.0, "rewards/chosen": -1.7336370944976807, "rewards/margins": 5.26425838470459, "rewards/rejected": -6.99789571762085, "step": 5100 }, { "epoch": 1.23, "learning_rate": 3.278213585309324e-07, "logits/chosen": -2.617797374725342, "logits/rejected": -2.512047290802002, "logps/chosen": -257.33697509765625, "logps/rejected": -271.3388671875, "loss": 0.0955, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.8858249187469482, "rewards/margins": 4.943978786468506, "rewards/rejected": -7.829803466796875, "step": 5110 }, { "epoch": 1.23, "learning_rate": 3.273756462827598e-07, "logits/chosen": -2.3365323543548584, "logits/rejected": -2.356816053390503, "logps/chosen": -191.58914184570312, "logps/rejected": -319.6527404785156, "loss": 0.0734, "rewards/accuracies": 1.0, "rewards/chosen": -1.186131238937378, "rewards/margins": 8.288546562194824, "rewards/rejected": -9.474678993225098, "step": 5120 }, { "epoch": 1.23, "learning_rate": 3.269299340345872e-07, "logits/chosen": -2.3473777770996094, "logits/rejected": -2.274927854537964, "logps/chosen": -283.12548828125, "logps/rejected": -362.0394287109375, "loss": 0.1288, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 1.0770468711853027, "rewards/margins": 12.561004638671875, "rewards/rejected": -11.48395824432373, "step": 5130 }, { "epoch": 1.24, "learning_rate": 3.2648422178641467e-07, "logits/chosen": -2.4011003971099854, "logits/rejected": -2.3673596382141113, "logps/chosen": -280.14373779296875, "logps/rejected": -360.3028259277344, "loss": 0.1372, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.6586049795150757, "rewards/margins": 8.177619934082031, "rewards/rejected": -8.836225509643555, "step": 5140 }, { "epoch": 1.24, "learning_rate": 3.260385095382421e-07, "logits/chosen": -2.405153751373291, "logits/rejected": -2.5100693702697754, "logps/chosen": -194.7483673095703, "logps/rejected": -254.55648803710938, "loss": 0.1007, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.961032509803772, "rewards/margins": 5.3906989097595215, "rewards/rejected": -6.351731300354004, "step": 5150 }, { "epoch": 1.24, "learning_rate": 3.255927972900695e-07, "logits/chosen": -2.440706729888916, "logits/rejected": -2.38972544670105, "logps/chosen": -340.3149108886719, "logps/rejected": -404.1507263183594, "loss": 0.0643, "rewards/accuracies": 1.0, "rewards/chosen": 0.6216408014297485, "rewards/margins": 9.724592208862305, "rewards/rejected": -9.102952003479004, "step": 5160 }, { "epoch": 1.24, "learning_rate": 3.2514708504189693e-07, "logits/chosen": -2.2282023429870605, "logits/rejected": -2.132371187210083, "logps/chosen": -308.7526550292969, "logps/rejected": -340.6700134277344, "loss": 0.0645, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.4051053524017334, "rewards/margins": 6.600663185119629, "rewards/rejected": -8.005767822265625, "step": 5170 }, { "epoch": 1.25, "learning_rate": 3.2470137279372434e-07, "logits/chosen": -2.456524133682251, "logits/rejected": -2.4100513458251953, "logps/chosen": -270.8525085449219, "logps/rejected": -367.73382568359375, "loss": 0.093, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.1903812289237976, "rewards/margins": 8.29595947265625, "rewards/rejected": -8.486339569091797, "step": 5180 }, { "epoch": 1.25, "learning_rate": 3.2425566054555174e-07, "logits/chosen": -2.426785707473755, "logits/rejected": -2.4124486446380615, "logps/chosen": -201.92190551757812, "logps/rejected": -254.50369262695312, "loss": 0.0998, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.218264579772949, "rewards/margins": 6.062951564788818, "rewards/rejected": -8.281216621398926, "step": 5190 }, { "epoch": 1.25, "learning_rate": 3.238099482973792e-07, "logits/chosen": -2.6022567749023438, "logits/rejected": -2.4152626991271973, "logps/chosen": -317.2975158691406, "logps/rejected": -331.3099670410156, "loss": 0.1425, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.007159471511841, "rewards/margins": 6.119154930114746, "rewards/rejected": -8.126314163208008, "step": 5200 }, { "epoch": 1.25, "eval_logits/chosen": -2.2021408081054688, "eval_logits/rejected": -2.1704671382904053, "eval_logps/chosen": -249.65997314453125, "eval_logps/rejected": -260.0906066894531, "eval_loss": 0.5238316655158997, "eval_rewards/accuracies": 0.6650000214576721, "eval_rewards/chosen": -4.773243427276611, "eval_rewards/margins": 2.282736301422119, "eval_rewards/rejected": -7.0559797286987305, "eval_runtime": 135.0398, "eval_samples_per_second": 23.371, "eval_steps_per_second": 0.37, "step": 5200 }, { "epoch": 1.25, "learning_rate": 3.233642360492066e-07, "logits/chosen": -2.610273599624634, "logits/rejected": -2.397230863571167, "logps/chosen": -259.42962646484375, "logps/rejected": -370.8842468261719, "loss": 0.0872, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.45601025223731995, "rewards/margins": 8.975427627563477, "rewards/rejected": -9.431438446044922, "step": 5210 }, { "epoch": 1.26, "learning_rate": 3.22918523801034e-07, "logits/chosen": -2.5248847007751465, "logits/rejected": -2.4510183334350586, "logps/chosen": -372.82757568359375, "logps/rejected": -339.1612548828125, "loss": 0.052, "rewards/accuracies": 1.0, "rewards/chosen": 0.026867162436246872, "rewards/margins": 8.06849193572998, "rewards/rejected": -8.041624069213867, "step": 5220 }, { "epoch": 1.26, "learning_rate": 3.2247281155286146e-07, "logits/chosen": -2.375784158706665, "logits/rejected": -2.28358793258667, "logps/chosen": -335.15679931640625, "logps/rejected": -404.1818542480469, "loss": 0.1309, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.0233367681503296, "rewards/margins": 7.956613063812256, "rewards/rejected": -8.979949951171875, "step": 5230 }, { "epoch": 1.26, "learning_rate": 3.2202709930468886e-07, "logits/chosen": -2.3147315979003906, "logits/rejected": -2.241579532623291, "logps/chosen": -314.4580078125, "logps/rejected": -595.355224609375, "loss": 0.0975, "rewards/accuracies": 1.0, "rewards/chosen": 0.8091908693313599, "rewards/margins": 26.27374839782715, "rewards/rejected": -25.46455955505371, "step": 5240 }, { "epoch": 1.26, "learning_rate": 3.2158138705651626e-07, "logits/chosen": -2.4444868564605713, "logits/rejected": -2.4202077388763428, "logps/chosen": -353.08477783203125, "logps/rejected": -466.76885986328125, "loss": 0.073, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.3587248921394348, "rewards/margins": 8.451542854309082, "rewards/rejected": -8.810267448425293, "step": 5250 }, { "epoch": 1.27, "learning_rate": 3.211356748083437e-07, "logits/chosen": -2.544177532196045, "logits/rejected": -2.4594388008117676, "logps/chosen": -326.6631774902344, "logps/rejected": -265.6733703613281, "loss": 0.2467, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.5703349113464355, "rewards/margins": 6.191134452819824, "rewards/rejected": -7.76146936416626, "step": 5260 }, { "epoch": 1.27, "learning_rate": 3.206899625601711e-07, "logits/chosen": -2.418236255645752, "logits/rejected": -2.3861804008483887, "logps/chosen": -219.2949676513672, "logps/rejected": -341.7588806152344, "loss": 0.0767, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.4184186458587646, "rewards/margins": 6.886469841003418, "rewards/rejected": -9.304888725280762, "step": 5270 }, { "epoch": 1.27, "learning_rate": 3.202442503119985e-07, "logits/chosen": -2.49849271774292, "logits/rejected": -2.432671070098877, "logps/chosen": -228.3261260986328, "logps/rejected": -281.47235107421875, "loss": 0.0582, "rewards/accuracies": 1.0, "rewards/chosen": -2.3620457649230957, "rewards/margins": 6.679247856140137, "rewards/rejected": -9.041293144226074, "step": 5280 }, { "epoch": 1.27, "learning_rate": 3.1979853806382603e-07, "logits/chosen": -2.4951071739196777, "logits/rejected": -2.4019224643707275, "logps/chosen": -286.7401428222656, "logps/rejected": -457.8524475097656, "loss": 0.0843, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.6505063772201538, "rewards/margins": 8.511384963989258, "rewards/rejected": -10.161893844604492, "step": 5290 }, { "epoch": 1.28, "learning_rate": 3.1935282581565344e-07, "logits/chosen": -2.551490068435669, "logits/rejected": -2.500885486602783, "logps/chosen": -276.5769348144531, "logps/rejected": -277.8192443847656, "loss": 0.0831, "rewards/accuracies": 1.0, "rewards/chosen": -0.6650933027267456, "rewards/margins": 7.8217010498046875, "rewards/rejected": -8.486794471740723, "step": 5300 }, { "epoch": 1.28, "learning_rate": 3.1890711356748084e-07, "logits/chosen": -2.4486889839172363, "logits/rejected": -2.468679428100586, "logps/chosen": -250.3290557861328, "logps/rejected": -292.9578857421875, "loss": 0.062, "rewards/accuracies": 1.0, "rewards/chosen": -0.37615785002708435, "rewards/margins": 8.366262435913086, "rewards/rejected": -8.74242115020752, "step": 5310 }, { "epoch": 1.28, "learning_rate": 3.1846140131930824e-07, "logits/chosen": -2.4542434215545654, "logits/rejected": -2.4101929664611816, "logps/chosen": -284.49713134765625, "logps/rejected": -281.8470458984375, "loss": 0.1189, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8050628900527954, "rewards/margins": 6.364178657531738, "rewards/rejected": -7.169241428375244, "step": 5320 }, { "epoch": 1.28, "learning_rate": 3.180156890711357e-07, "logits/chosen": -2.459995746612549, "logits/rejected": -2.2036283016204834, "logps/chosen": -274.40582275390625, "logps/rejected": -258.71160888671875, "loss": 0.0773, "rewards/accuracies": 1.0, "rewards/chosen": -0.7084132432937622, "rewards/margins": 6.719203948974609, "rewards/rejected": -7.42761754989624, "step": 5330 }, { "epoch": 1.29, "learning_rate": 3.175699768229631e-07, "logits/chosen": -2.396700382232666, "logits/rejected": -2.4109833240509033, "logps/chosen": -257.54498291015625, "logps/rejected": -302.1127624511719, "loss": 0.0777, "rewards/accuracies": 1.0, "rewards/chosen": 0.19791404902935028, "rewards/margins": 7.5934038162231445, "rewards/rejected": -7.395489692687988, "step": 5340 }, { "epoch": 1.29, "learning_rate": 3.171242645747905e-07, "logits/chosen": -2.379744052886963, "logits/rejected": -2.3605384826660156, "logps/chosen": -260.70428466796875, "logps/rejected": -273.8489685058594, "loss": 0.1391, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5692093968391418, "rewards/margins": 8.12734603881836, "rewards/rejected": -8.696555137634277, "step": 5350 }, { "epoch": 1.29, "learning_rate": 3.1667855232661796e-07, "logits/chosen": -2.66178297996521, "logits/rejected": -2.6306025981903076, "logps/chosen": -287.98992919921875, "logps/rejected": -422.4910583496094, "loss": 0.0707, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.1379169523715973, "rewards/margins": 10.600385665893555, "rewards/rejected": -10.462468147277832, "step": 5360 }, { "epoch": 1.29, "learning_rate": 3.1623284007844536e-07, "logits/chosen": -2.724097967147827, "logits/rejected": -2.6357712745666504, "logps/chosen": -248.39602661132812, "logps/rejected": -262.27197265625, "loss": 0.1339, "rewards/accuracies": 1.0, "rewards/chosen": -0.5081327557563782, "rewards/margins": 5.618767261505127, "rewards/rejected": -6.126899719238281, "step": 5370 }, { "epoch": 1.29, "learning_rate": 3.1578712783027276e-07, "logits/chosen": -2.5694947242736816, "logits/rejected": -2.6334919929504395, "logps/chosen": -240.76773071289062, "logps/rejected": -364.18157958984375, "loss": 0.1331, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0891406536102295, "rewards/margins": 8.847578048706055, "rewards/rejected": -9.936718940734863, "step": 5380 }, { "epoch": 1.3, "learning_rate": 3.153414155821002e-07, "logits/chosen": -2.7643802165985107, "logits/rejected": -2.6889286041259766, "logps/chosen": -269.68487548828125, "logps/rejected": -359.32550048828125, "loss": 0.1253, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.20100903511047363, "rewards/margins": 7.6140313148498535, "rewards/rejected": -7.815041542053223, "step": 5390 }, { "epoch": 1.3, "learning_rate": 3.148957033339276e-07, "logits/chosen": -2.576159954071045, "logits/rejected": -2.6012067794799805, "logps/chosen": -280.146240234375, "logps/rejected": -369.99505615234375, "loss": 0.0596, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.37197595834732056, "rewards/margins": 9.538189888000488, "rewards/rejected": -9.910165786743164, "step": 5400 }, { "epoch": 1.3, "learning_rate": 3.14449991085755e-07, "logits/chosen": -2.5083253383636475, "logits/rejected": -2.4267446994781494, "logps/chosen": -232.00759887695312, "logps/rejected": -333.5928955078125, "loss": 0.0747, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.04621438309550285, "rewards/margins": 11.238184928894043, "rewards/rejected": -11.191969871520996, "step": 5410 }, { "epoch": 1.3, "learning_rate": 3.140042788375825e-07, "logits/chosen": -2.523236036300659, "logits/rejected": -2.442023754119873, "logps/chosen": -205.5675048828125, "logps/rejected": -263.4608459472656, "loss": 0.1162, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.9463703632354736, "rewards/margins": 7.0555853843688965, "rewards/rejected": -9.001955032348633, "step": 5420 }, { "epoch": 1.31, "learning_rate": 3.135585665894099e-07, "logits/chosen": -2.658153533935547, "logits/rejected": -2.6786551475524902, "logps/chosen": -285.0563659667969, "logps/rejected": -303.50701904296875, "loss": 0.1408, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0504812002182007, "rewards/margins": 5.920778751373291, "rewards/rejected": -6.971259117126465, "step": 5430 }, { "epoch": 1.31, "learning_rate": 3.131128543412373e-07, "logits/chosen": -2.492877960205078, "logits/rejected": -2.3323912620544434, "logps/chosen": -281.31512451171875, "logps/rejected": -279.0833740234375, "loss": 0.1003, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.3006997108459473, "rewards/margins": 7.696803092956543, "rewards/rejected": -8.997502326965332, "step": 5440 }, { "epoch": 1.31, "learning_rate": 3.1266714209306474e-07, "logits/chosen": -2.605281114578247, "logits/rejected": -2.359964609146118, "logps/chosen": -235.1231231689453, "logps/rejected": -295.6809387207031, "loss": 0.0503, "rewards/accuracies": 1.0, "rewards/chosen": -1.4890674352645874, "rewards/margins": 6.981287956237793, "rewards/rejected": -8.470356941223145, "step": 5450 }, { "epoch": 1.31, "learning_rate": 3.1222142984489215e-07, "logits/chosen": -2.5633320808410645, "logits/rejected": -2.5102694034576416, "logps/chosen": -299.23663330078125, "logps/rejected": -306.9552307128906, "loss": 0.118, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.6820122003555298, "rewards/margins": 6.708361625671387, "rewards/rejected": -8.390375137329102, "step": 5460 }, { "epoch": 1.32, "learning_rate": 3.1177571759671955e-07, "logits/chosen": -2.7502570152282715, "logits/rejected": -2.6397671699523926, "logps/chosen": -380.3826904296875, "logps/rejected": -339.49615478515625, "loss": 0.0824, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.43768367171287537, "rewards/margins": 8.252010345458984, "rewards/rejected": -7.814326286315918, "step": 5470 }, { "epoch": 1.32, "learning_rate": 3.1133000534854695e-07, "logits/chosen": -2.530097484588623, "logits/rejected": -2.455191135406494, "logps/chosen": -317.61968994140625, "logps/rejected": -382.91668701171875, "loss": 0.1061, "rewards/accuracies": 1.0, "rewards/chosen": -1.5647618770599365, "rewards/margins": 6.722103118896484, "rewards/rejected": -8.286864280700684, "step": 5480 }, { "epoch": 1.32, "learning_rate": 3.108842931003744e-07, "logits/chosen": -2.5136406421661377, "logits/rejected": -2.657163143157959, "logps/chosen": -261.32818603515625, "logps/rejected": -346.85260009765625, "loss": 0.1256, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.4253344535827637, "rewards/margins": 5.57681941986084, "rewards/rejected": -7.002154350280762, "step": 5490 }, { "epoch": 1.32, "learning_rate": 3.104385808522018e-07, "logits/chosen": -2.4770731925964355, "logits/rejected": -2.3463892936706543, "logps/chosen": -300.9383850097656, "logps/rejected": -277.59539794921875, "loss": 0.0992, "rewards/accuracies": 1.0, "rewards/chosen": -0.7142188549041748, "rewards/margins": 5.560961723327637, "rewards/rejected": -6.275180339813232, "step": 5500 }, { "epoch": 1.33, "learning_rate": 3.099928686040292e-07, "logits/chosen": -2.2352776527404785, "logits/rejected": -2.2539238929748535, "logps/chosen": -149.8365936279297, "logps/rejected": -223.1758575439453, "loss": 0.0864, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.1484190821647644, "rewards/margins": 7.326336860656738, "rewards/rejected": -7.474755764007568, "step": 5510 }, { "epoch": 1.33, "learning_rate": 3.0954715635585667e-07, "logits/chosen": -2.6775448322296143, "logits/rejected": -2.567296266555786, "logps/chosen": -289.3621520996094, "logps/rejected": -284.3583984375, "loss": 0.1873, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.6089405417442322, "rewards/margins": 6.219395637512207, "rewards/rejected": -6.828335762023926, "step": 5520 }, { "epoch": 1.33, "learning_rate": 3.0910144410768407e-07, "logits/chosen": -2.5233407020568848, "logits/rejected": -2.577707290649414, "logps/chosen": -276.72845458984375, "logps/rejected": -373.488525390625, "loss": 0.0691, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.833437204360962, "rewards/margins": 6.862840175628662, "rewards/rejected": -9.696276664733887, "step": 5530 }, { "epoch": 1.33, "learning_rate": 3.086557318595115e-07, "logits/chosen": -2.51139235496521, "logits/rejected": -2.4481873512268066, "logps/chosen": -339.71600341796875, "logps/rejected": -294.01806640625, "loss": 0.0977, "rewards/accuracies": 1.0, "rewards/chosen": -1.5098949670791626, "rewards/margins": 6.523590087890625, "rewards/rejected": -8.033485412597656, "step": 5540 }, { "epoch": 1.34, "learning_rate": 3.0821001961133893e-07, "logits/chosen": -2.7013440132141113, "logits/rejected": -2.571755886077881, "logps/chosen": -272.5346984863281, "logps/rejected": -308.40118408203125, "loss": 0.0972, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.3033177852630615, "rewards/margins": 8.641069412231445, "rewards/rejected": -8.337750434875488, "step": 5550 }, { "epoch": 1.34, "learning_rate": 3.0776430736316633e-07, "logits/chosen": -2.5146682262420654, "logits/rejected": -2.4330825805664062, "logps/chosen": -255.439208984375, "logps/rejected": -346.15216064453125, "loss": 0.0697, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.32078343629837036, "rewards/margins": 8.167900085449219, "rewards/rejected": -8.488683700561523, "step": 5560 }, { "epoch": 1.34, "learning_rate": 3.0731859511499374e-07, "logits/chosen": -2.6147828102111816, "logits/rejected": -2.5036494731903076, "logps/chosen": -280.19659423828125, "logps/rejected": -364.2330017089844, "loss": 0.069, "rewards/accuracies": 1.0, "rewards/chosen": -0.1797616183757782, "rewards/margins": 8.10425853729248, "rewards/rejected": -8.28402042388916, "step": 5570 }, { "epoch": 1.34, "learning_rate": 3.068728828668212e-07, "logits/chosen": -2.7004218101501465, "logits/rejected": -2.564936637878418, "logps/chosen": -290.7904968261719, "logps/rejected": -324.06072998046875, "loss": 0.1179, "rewards/accuracies": 1.0, "rewards/chosen": -0.9224249124526978, "rewards/margins": 7.382409572601318, "rewards/rejected": -8.304835319519043, "step": 5580 }, { "epoch": 1.35, "learning_rate": 3.064271706186486e-07, "logits/chosen": -2.7208399772644043, "logits/rejected": -2.697457790374756, "logps/chosen": -325.6331481933594, "logps/rejected": -395.9976806640625, "loss": 0.0723, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5056465268135071, "rewards/margins": 7.022643089294434, "rewards/rejected": -7.528289794921875, "step": 5590 }, { "epoch": 1.35, "learning_rate": 3.05981458370476e-07, "logits/chosen": -2.454925060272217, "logits/rejected": -2.514404296875, "logps/chosen": -292.55413818359375, "logps/rejected": -297.22515869140625, "loss": 0.1053, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.5471060276031494, "rewards/margins": 6.992198944091797, "rewards/rejected": -7.539304256439209, "step": 5600 }, { "epoch": 1.35, "eval_logits/chosen": -2.297848701477051, "eval_logits/rejected": -2.2597482204437256, "eval_logps/chosen": -250.8496856689453, "eval_logps/rejected": -264.89166259765625, "eval_loss": 0.529845654964447, "eval_rewards/accuracies": 0.6899999976158142, "eval_rewards/chosen": -4.892212867736816, "eval_rewards/margins": 2.6438732147216797, "eval_rewards/rejected": -7.536085605621338, "eval_runtime": 135.6664, "eval_samples_per_second": 23.263, "eval_steps_per_second": 0.369, "step": 5600 }, { "epoch": 1.35, "learning_rate": 3.0553574612230345e-07, "logits/chosen": -2.473297119140625, "logits/rejected": -2.524893045425415, "logps/chosen": -269.2584533691406, "logps/rejected": -356.3623046875, "loss": 0.148, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2787388563156128, "rewards/margins": 8.458620071411133, "rewards/rejected": -9.737358093261719, "step": 5610 }, { "epoch": 1.35, "learning_rate": 3.0509003387413086e-07, "logits/chosen": -2.58944034576416, "logits/rejected": -2.517916679382324, "logps/chosen": -284.4920654296875, "logps/rejected": -234.60812377929688, "loss": 0.1635, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.48317551612854004, "rewards/margins": 6.530011177062988, "rewards/rejected": -7.013186454772949, "step": 5620 }, { "epoch": 1.35, "learning_rate": 3.0464432162595826e-07, "logits/chosen": -2.6766116619110107, "logits/rejected": -2.5364537239074707, "logps/chosen": -389.23895263671875, "logps/rejected": -419.3583068847656, "loss": 0.2322, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.5472573041915894, "rewards/margins": 9.332606315612793, "rewards/rejected": -8.785348892211914, "step": 5630 }, { "epoch": 1.36, "learning_rate": 3.0419860937778566e-07, "logits/chosen": -2.507610559463501, "logits/rejected": -2.510158061981201, "logps/chosen": -304.1316223144531, "logps/rejected": -439.6920471191406, "loss": 0.0783, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.4623780846595764, "rewards/margins": 11.98354721069336, "rewards/rejected": -11.52116870880127, "step": 5640 }, { "epoch": 1.36, "learning_rate": 3.037528971296131e-07, "logits/chosen": -2.271934986114502, "logits/rejected": -2.4431185722351074, "logps/chosen": -226.60751342773438, "logps/rejected": -319.9784240722656, "loss": 0.0787, "rewards/accuracies": 1.0, "rewards/chosen": -0.8316129446029663, "rewards/margins": 8.930788040161133, "rewards/rejected": -9.762401580810547, "step": 5650 }, { "epoch": 1.36, "learning_rate": 3.033071848814405e-07, "logits/chosen": -2.4854538440704346, "logits/rejected": -2.3633275032043457, "logps/chosen": -248.4595489501953, "logps/rejected": -253.173583984375, "loss": 0.1399, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.258836030960083, "rewards/margins": 6.145152568817139, "rewards/rejected": -7.403987884521484, "step": 5660 }, { "epoch": 1.36, "learning_rate": 3.028614726332679e-07, "logits/chosen": -2.17875599861145, "logits/rejected": -2.291477918624878, "logps/chosen": -224.3546905517578, "logps/rejected": -389.11346435546875, "loss": 0.1036, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.877074122428894, "rewards/margins": 6.948840141296387, "rewards/rejected": -7.8259148597717285, "step": 5670 }, { "epoch": 1.37, "learning_rate": 3.024157603850954e-07, "logits/chosen": -2.3191721439361572, "logits/rejected": -2.257412910461426, "logps/chosen": -317.46038818359375, "logps/rejected": -424.9566345214844, "loss": 0.069, "rewards/accuracies": 1.0, "rewards/chosen": -2.4260387420654297, "rewards/margins": 6.946034908294678, "rewards/rejected": -9.37207317352295, "step": 5680 }, { "epoch": 1.37, "learning_rate": 3.019700481369228e-07, "logits/chosen": -2.339301586151123, "logits/rejected": -2.292539358139038, "logps/chosen": -224.4660186767578, "logps/rejected": -261.5100402832031, "loss": 0.078, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.1883065700531006, "rewards/margins": 7.293276309967041, "rewards/rejected": -8.481582641601562, "step": 5690 }, { "epoch": 1.37, "learning_rate": 3.015243358887502e-07, "logits/chosen": -2.2622950077056885, "logits/rejected": -2.1832115650177, "logps/chosen": -314.8285217285156, "logps/rejected": -275.82293701171875, "loss": 0.1257, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.49560078978538513, "rewards/margins": 8.324846267700195, "rewards/rejected": -8.820446968078613, "step": 5700 }, { "epoch": 1.37, "learning_rate": 3.0107862364057764e-07, "logits/chosen": -2.185823440551758, "logits/rejected": -2.0748419761657715, "logps/chosen": -157.82144165039062, "logps/rejected": -272.6867980957031, "loss": 0.1587, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2449867725372314, "rewards/margins": 8.263470649719238, "rewards/rejected": -9.50845718383789, "step": 5710 }, { "epoch": 1.38, "learning_rate": 3.0063291139240504e-07, "logits/chosen": -2.4911751747131348, "logits/rejected": -2.346667766571045, "logps/chosen": -274.0582275390625, "logps/rejected": -328.19305419921875, "loss": 0.1107, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.037358283996582, "rewards/margins": 7.605017185211182, "rewards/rejected": -9.642374992370605, "step": 5720 }, { "epoch": 1.38, "learning_rate": 3.0018719914423245e-07, "logits/chosen": -2.4801878929138184, "logits/rejected": -2.505631446838379, "logps/chosen": -272.7338562011719, "logps/rejected": -355.825439453125, "loss": 0.0679, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.3978612422943115, "rewards/margins": 8.758639335632324, "rewards/rejected": -11.156499862670898, "step": 5730 }, { "epoch": 1.38, "learning_rate": 2.997414868960599e-07, "logits/chosen": -2.3838882446289062, "logits/rejected": -2.4003381729125977, "logps/chosen": -213.65060424804688, "logps/rejected": -358.7235412597656, "loss": 0.1576, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.312072277069092, "rewards/margins": 7.0126800537109375, "rewards/rejected": -10.324752807617188, "step": 5740 }, { "epoch": 1.38, "learning_rate": 2.992957746478873e-07, "logits/chosen": -2.3882734775543213, "logits/rejected": -2.3687222003936768, "logps/chosen": -181.3330535888672, "logps/rejected": -329.9533386230469, "loss": 0.0921, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.8159842491149902, "rewards/margins": 7.217014312744141, "rewards/rejected": -10.032999038696289, "step": 5750 }, { "epoch": 1.39, "learning_rate": 2.988500623997147e-07, "logits/chosen": -2.4455018043518066, "logits/rejected": -2.3834733963012695, "logps/chosen": -220.39810180664062, "logps/rejected": -276.8576965332031, "loss": 0.1322, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.6589540243148804, "rewards/margins": 6.164792060852051, "rewards/rejected": -7.823746681213379, "step": 5760 }, { "epoch": 1.39, "learning_rate": 2.9840435015154216e-07, "logits/chosen": -2.2847981452941895, "logits/rejected": -2.1710095405578613, "logps/chosen": -263.8554382324219, "logps/rejected": -264.84405517578125, "loss": 0.1504, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1371574401855469, "rewards/margins": 7.276597023010254, "rewards/rejected": -8.4137544631958, "step": 5770 }, { "epoch": 1.39, "learning_rate": 2.9795863790336957e-07, "logits/chosen": -1.97348952293396, "logits/rejected": -2.097926139831543, "logps/chosen": -215.86856079101562, "logps/rejected": -299.7110595703125, "loss": 0.1016, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.6454339027404785, "rewards/margins": 5.7139201164245605, "rewards/rejected": -8.359354019165039, "step": 5780 }, { "epoch": 1.39, "learning_rate": 2.9751292565519697e-07, "logits/chosen": -2.5739948749542236, "logits/rejected": -2.4434008598327637, "logps/chosen": -332.3185119628906, "logps/rejected": -269.8470153808594, "loss": 0.1018, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.0568695068359375, "rewards/margins": 5.8183512687683105, "rewards/rejected": -7.87522029876709, "step": 5790 }, { "epoch": 1.4, "learning_rate": 2.9706721340702437e-07, "logits/chosen": -2.3245058059692383, "logits/rejected": -2.3892173767089844, "logps/chosen": -261.55718994140625, "logps/rejected": -333.1568298339844, "loss": 0.1085, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.437150239944458, "rewards/margins": 8.149467468261719, "rewards/rejected": -9.586616516113281, "step": 5800 }, { "epoch": 1.4, "learning_rate": 2.9662150115885183e-07, "logits/chosen": -2.138545274734497, "logits/rejected": -2.2848308086395264, "logps/chosen": -298.55218505859375, "logps/rejected": -374.3736877441406, "loss": 0.106, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.0430874340236187, "rewards/margins": 9.399953842163086, "rewards/rejected": -9.44304084777832, "step": 5810 }, { "epoch": 1.4, "learning_rate": 2.9617578891067923e-07, "logits/chosen": -2.4602222442626953, "logits/rejected": -2.3467583656311035, "logps/chosen": -264.9916687011719, "logps/rejected": -325.85040283203125, "loss": 0.0602, "rewards/accuracies": 1.0, "rewards/chosen": -1.7568753957748413, "rewards/margins": 7.706831455230713, "rewards/rejected": -9.463706970214844, "step": 5820 }, { "epoch": 1.4, "learning_rate": 2.9573007666250663e-07, "logits/chosen": -2.4260144233703613, "logits/rejected": -2.1966934204101562, "logps/chosen": -301.56878662109375, "logps/rejected": -282.50006103515625, "loss": 0.0655, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.379481554031372, "rewards/margins": 6.654795169830322, "rewards/rejected": -9.034276962280273, "step": 5830 }, { "epoch": 1.41, "learning_rate": 2.952843644143341e-07, "logits/chosen": -2.232840061187744, "logits/rejected": -2.278740406036377, "logps/chosen": -355.1175231933594, "logps/rejected": -432.6170349121094, "loss": 0.1483, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.7908014059066772, "rewards/margins": 5.564481735229492, "rewards/rejected": -7.355282783508301, "step": 5840 }, { "epoch": 1.41, "learning_rate": 2.948386521661615e-07, "logits/chosen": -2.609567403793335, "logits/rejected": -2.523547649383545, "logps/chosen": -227.25277709960938, "logps/rejected": -272.94561767578125, "loss": 0.1382, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.8979425430297852, "rewards/margins": 8.014738082885742, "rewards/rejected": -9.912680625915527, "step": 5850 }, { "epoch": 1.41, "learning_rate": 2.943929399179889e-07, "logits/chosen": -2.6208488941192627, "logits/rejected": -2.610959768295288, "logps/chosen": -242.736083984375, "logps/rejected": -297.24835205078125, "loss": 0.15, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.2648603916168213, "rewards/margins": 6.404683589935303, "rewards/rejected": -8.669544219970703, "step": 5860 }, { "epoch": 1.41, "learning_rate": 2.9394722766981635e-07, "logits/chosen": -2.3997504711151123, "logits/rejected": -2.243252992630005, "logps/chosen": -321.68878173828125, "logps/rejected": -326.05242919921875, "loss": 0.1711, "rewards/accuracies": 1.0, "rewards/chosen": -1.2938514947891235, "rewards/margins": 7.7465667724609375, "rewards/rejected": -9.04041862487793, "step": 5870 }, { "epoch": 1.42, "learning_rate": 2.9350151542164375e-07, "logits/chosen": -2.440444231033325, "logits/rejected": -2.283862590789795, "logps/chosen": -273.25384521484375, "logps/rejected": -276.9793701171875, "loss": 0.0978, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.1437404900789261, "rewards/margins": 8.990808486938477, "rewards/rejected": -8.847066879272461, "step": 5880 }, { "epoch": 1.42, "learning_rate": 2.9305580317347116e-07, "logits/chosen": -2.2605152130126953, "logits/rejected": -2.250079393386841, "logps/chosen": -291.37786865234375, "logps/rejected": -414.73602294921875, "loss": 0.1424, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.4673226773738861, "rewards/margins": 10.09349250793457, "rewards/rejected": -10.560815811157227, "step": 5890 }, { "epoch": 1.42, "learning_rate": 2.926100909252986e-07, "logits/chosen": -2.527416229248047, "logits/rejected": -2.365100145339966, "logps/chosen": -224.0863494873047, "logps/rejected": -231.84524536132812, "loss": 0.0941, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.3509066104888916, "rewards/margins": 5.722054481506348, "rewards/rejected": -7.07296085357666, "step": 5900 }, { "epoch": 1.42, "learning_rate": 2.92164378677126e-07, "logits/chosen": -2.506539821624756, "logits/rejected": -2.341373920440674, "logps/chosen": -362.89874267578125, "logps/rejected": -298.7274169921875, "loss": 0.0757, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1734457015991211, "rewards/margins": 7.619866371154785, "rewards/rejected": -7.793312072753906, "step": 5910 }, { "epoch": 1.42, "learning_rate": 2.917186664289534e-07, "logits/chosen": -2.5742313861846924, "logits/rejected": -2.57194185256958, "logps/chosen": -299.4224853515625, "logps/rejected": -357.34814453125, "loss": 0.1328, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.2363429069519043, "rewards/margins": 8.072032928466797, "rewards/rejected": -8.308377265930176, "step": 5920 }, { "epoch": 1.43, "learning_rate": 2.912729541807809e-07, "logits/chosen": -2.4831509590148926, "logits/rejected": -2.3615055084228516, "logps/chosen": -299.07000732421875, "logps/rejected": -282.1550598144531, "loss": 0.128, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6664316654205322, "rewards/margins": 6.462451934814453, "rewards/rejected": -7.128883361816406, "step": 5930 }, { "epoch": 1.43, "learning_rate": 2.908272419326083e-07, "logits/chosen": -2.446892261505127, "logits/rejected": -2.4011735916137695, "logps/chosen": -290.58453369140625, "logps/rejected": -342.33673095703125, "loss": 0.13, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9740456342697144, "rewards/margins": 6.794226169586182, "rewards/rejected": -7.768272399902344, "step": 5940 }, { "epoch": 1.43, "learning_rate": 2.903815296844357e-07, "logits/chosen": -2.438033103942871, "logits/rejected": -2.404005527496338, "logps/chosen": -215.423095703125, "logps/rejected": -287.54266357421875, "loss": 0.0582, "rewards/accuracies": 1.0, "rewards/chosen": -0.6465083360671997, "rewards/margins": 5.986216068267822, "rewards/rejected": -6.632723808288574, "step": 5950 }, { "epoch": 1.43, "learning_rate": 2.899358174362631e-07, "logits/chosen": -2.4988837242126465, "logits/rejected": -2.3473308086395264, "logps/chosen": -323.3792419433594, "logps/rejected": -318.721923828125, "loss": 0.1268, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.178083896636963, "rewards/margins": 5.7345428466796875, "rewards/rejected": -7.91262674331665, "step": 5960 }, { "epoch": 1.44, "learning_rate": 2.894901051880906e-07, "logits/chosen": -2.4918477535247803, "logits/rejected": -2.487396478652954, "logps/chosen": -294.41302490234375, "logps/rejected": -397.73846435546875, "loss": 0.0924, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.4407653212547302, "rewards/margins": 9.137907981872559, "rewards/rejected": -8.697144508361816, "step": 5970 }, { "epoch": 1.44, "learning_rate": 2.89044392939918e-07, "logits/chosen": -2.275686740875244, "logits/rejected": -2.2461109161376953, "logps/chosen": -275.2303161621094, "logps/rejected": -260.4136657714844, "loss": 0.0834, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.20579476654529572, "rewards/margins": 7.3556413650512695, "rewards/rejected": -7.561434745788574, "step": 5980 }, { "epoch": 1.44, "learning_rate": 2.885986806917454e-07, "logits/chosen": -2.445239305496216, "logits/rejected": -2.4250895977020264, "logps/chosen": -253.6104278564453, "logps/rejected": -262.63494873046875, "loss": 0.1374, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.17287278175354, "rewards/margins": 6.609041690826416, "rewards/rejected": -7.781915187835693, "step": 5990 }, { "epoch": 1.44, "learning_rate": 2.8815296844357285e-07, "logits/chosen": -2.5300464630126953, "logits/rejected": -2.318606376647949, "logps/chosen": -294.62908935546875, "logps/rejected": -317.3363952636719, "loss": 0.1301, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2451810836791992, "rewards/margins": 7.596142768859863, "rewards/rejected": -8.841323852539062, "step": 6000 }, { "epoch": 1.44, "eval_logits/chosen": -2.1991782188415527, "eval_logits/rejected": -2.160623073577881, "eval_logps/chosen": -242.2802276611328, "eval_logps/rejected": -255.31179809570312, "eval_loss": 0.5189629197120667, "eval_rewards/accuracies": 0.6850000023841858, "eval_rewards/chosen": -4.035269737243652, "eval_rewards/margins": 2.5428318977355957, "eval_rewards/rejected": -6.578101634979248, "eval_runtime": 133.762, "eval_samples_per_second": 23.594, "eval_steps_per_second": 0.374, "step": 6000 }, { "epoch": 1.45, "learning_rate": 2.8770725619540026e-07, "logits/chosen": -2.5709338188171387, "logits/rejected": -2.4518074989318848, "logps/chosen": -314.88885498046875, "logps/rejected": -301.244140625, "loss": 0.0742, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.20206031203269958, "rewards/margins": 6.6858673095703125, "rewards/rejected": -6.887927055358887, "step": 6010 }, { "epoch": 1.45, "learning_rate": 2.8726154394722766e-07, "logits/chosen": -2.309180498123169, "logits/rejected": -2.3795578479766846, "logps/chosen": -315.67608642578125, "logps/rejected": -392.2315673828125, "loss": 0.0897, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.9258975982666016, "rewards/margins": 8.317319869995117, "rewards/rejected": -10.243217468261719, "step": 6020 }, { "epoch": 1.45, "learning_rate": 2.868158316990551e-07, "logits/chosen": -2.2017533779144287, "logits/rejected": -2.311129093170166, "logps/chosen": -228.1017303466797, "logps/rejected": -276.9014587402344, "loss": 0.0993, "rewards/accuracies": 1.0, "rewards/chosen": -0.5432249903678894, "rewards/margins": 7.823616027832031, "rewards/rejected": -8.366842269897461, "step": 6030 }, { "epoch": 1.45, "learning_rate": 2.863701194508825e-07, "logits/chosen": -2.485600709915161, "logits/rejected": -2.3876876831054688, "logps/chosen": -299.70086669921875, "logps/rejected": -301.390869140625, "loss": 0.1048, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.8832274675369263, "rewards/margins": 7.019740104675293, "rewards/rejected": -7.902967929840088, "step": 6040 }, { "epoch": 1.46, "learning_rate": 2.859244072027099e-07, "logits/chosen": -2.3670849800109863, "logits/rejected": -2.3215155601501465, "logps/chosen": -349.9449157714844, "logps/rejected": -337.61346435546875, "loss": 0.0864, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.7926805019378662, "rewards/margins": 7.919983863830566, "rewards/rejected": -9.712663650512695, "step": 6050 }, { "epoch": 1.46, "learning_rate": 2.854786949545374e-07, "logits/chosen": -2.5219531059265137, "logits/rejected": -2.479630947113037, "logps/chosen": -402.0883483886719, "logps/rejected": -387.61468505859375, "loss": 0.0834, "rewards/accuracies": 1.0, "rewards/chosen": -0.2764887511730194, "rewards/margins": 7.328072547912598, "rewards/rejected": -7.604561805725098, "step": 6060 }, { "epoch": 1.46, "learning_rate": 2.850329827063648e-07, "logits/chosen": -2.4510700702667236, "logits/rejected": -2.43742036819458, "logps/chosen": -216.06005859375, "logps/rejected": -308.4542541503906, "loss": 0.0882, "rewards/accuracies": 1.0, "rewards/chosen": -0.940998911857605, "rewards/margins": 10.316885948181152, "rewards/rejected": -11.257884979248047, "step": 6070 }, { "epoch": 1.46, "learning_rate": 2.845872704581922e-07, "logits/chosen": -2.4044597148895264, "logits/rejected": -2.3395800590515137, "logps/chosen": -265.0940856933594, "logps/rejected": -279.3209228515625, "loss": 0.0436, "rewards/accuracies": 1.0, "rewards/chosen": -0.5933882594108582, "rewards/margins": 7.275848388671875, "rewards/rejected": -7.869236946105957, "step": 6080 }, { "epoch": 1.47, "learning_rate": 2.8414155821001964e-07, "logits/chosen": -2.4883437156677246, "logits/rejected": -2.4578349590301514, "logps/chosen": -292.8638916015625, "logps/rejected": -271.67974853515625, "loss": 0.1118, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.9473133087158203, "rewards/margins": 4.8426995277404785, "rewards/rejected": -6.790012359619141, "step": 6090 }, { "epoch": 1.47, "learning_rate": 2.8369584596184704e-07, "logits/chosen": -2.505039691925049, "logits/rejected": -2.3995630741119385, "logps/chosen": -295.21234130859375, "logps/rejected": -328.1549987792969, "loss": 0.1052, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 1.7760322093963623, "rewards/margins": 12.22137451171875, "rewards/rejected": -10.445342063903809, "step": 6100 }, { "epoch": 1.47, "learning_rate": 2.8325013371367444e-07, "logits/chosen": -2.303755521774292, "logits/rejected": -2.3532848358154297, "logps/chosen": -219.3915557861328, "logps/rejected": -285.55853271484375, "loss": 0.0968, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1725101470947266, "rewards/margins": 7.460213661193848, "rewards/rejected": -8.63272476196289, "step": 6110 }, { "epoch": 1.47, "learning_rate": 2.828044214655019e-07, "logits/chosen": -2.404218912124634, "logits/rejected": -2.408658504486084, "logps/chosen": -233.19583129882812, "logps/rejected": -346.0060729980469, "loss": 0.1007, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.6594064831733704, "rewards/margins": 8.938102722167969, "rewards/rejected": -9.597509384155273, "step": 6120 }, { "epoch": 1.48, "learning_rate": 2.823587092173293e-07, "logits/chosen": -2.45180344581604, "logits/rejected": -2.4573071002960205, "logps/chosen": -240.5504150390625, "logps/rejected": -350.02386474609375, "loss": 0.0596, "rewards/accuracies": 1.0, "rewards/chosen": 0.06072967126965523, "rewards/margins": 8.110355377197266, "rewards/rejected": -8.049626350402832, "step": 6130 }, { "epoch": 1.48, "learning_rate": 2.819129969691567e-07, "logits/chosen": -2.5962586402893066, "logits/rejected": -2.6155471801757812, "logps/chosen": -405.8052673339844, "logps/rejected": -486.88604736328125, "loss": 0.0864, "rewards/accuracies": 1.0, "rewards/chosen": 0.47397932410240173, "rewards/margins": 10.261617660522461, "rewards/rejected": -9.787638664245605, "step": 6140 }, { "epoch": 1.48, "learning_rate": 2.814672847209841e-07, "logits/chosen": -2.578833818435669, "logits/rejected": -2.530801773071289, "logps/chosen": -260.2953186035156, "logps/rejected": -258.282958984375, "loss": 0.0889, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.3781076669692993, "rewards/margins": 5.418423652648926, "rewards/rejected": -6.796531677246094, "step": 6150 }, { "epoch": 1.48, "learning_rate": 2.8102157247281156e-07, "logits/chosen": -2.2971138954162598, "logits/rejected": -2.390742063522339, "logps/chosen": -151.07147216796875, "logps/rejected": -317.8833923339844, "loss": 0.114, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.399823397397995, "rewards/margins": 11.081315994262695, "rewards/rejected": -11.481138229370117, "step": 6160 }, { "epoch": 1.48, "learning_rate": 2.8057586022463897e-07, "logits/chosen": -2.4942171573638916, "logits/rejected": -2.372739553451538, "logps/chosen": -300.8179016113281, "logps/rejected": -292.6261901855469, "loss": 0.0753, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7665277719497681, "rewards/margins": 6.451620578765869, "rewards/rejected": -7.218148231506348, "step": 6170 }, { "epoch": 1.49, "learning_rate": 2.8013014797646637e-07, "logits/chosen": -2.4573559761047363, "logits/rejected": -2.380481719970703, "logps/chosen": -330.7941589355469, "logps/rejected": -366.9463195800781, "loss": 0.1366, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.0739825963973999, "rewards/margins": 9.741181373596191, "rewards/rejected": -9.667200088500977, "step": 6180 }, { "epoch": 1.49, "learning_rate": 2.796844357282938e-07, "logits/chosen": -2.4501938819885254, "logits/rejected": -2.291347026824951, "logps/chosen": -301.8091125488281, "logps/rejected": -354.0787353515625, "loss": 0.0845, "rewards/accuracies": 1.0, "rewards/chosen": 0.13122543692588806, "rewards/margins": 10.396775245666504, "rewards/rejected": -10.265549659729004, "step": 6190 }, { "epoch": 1.49, "learning_rate": 2.7923872348012123e-07, "logits/chosen": -2.2618536949157715, "logits/rejected": -2.407378673553467, "logps/chosen": -209.48526000976562, "logps/rejected": -390.2980651855469, "loss": 0.0843, "rewards/accuracies": 1.0, "rewards/chosen": -1.5589649677276611, "rewards/margins": 8.035758972167969, "rewards/rejected": -9.594724655151367, "step": 6200 }, { "epoch": 1.49, "learning_rate": 2.7879301123194863e-07, "logits/chosen": -2.3829917907714844, "logits/rejected": -2.404918670654297, "logps/chosen": -357.5557556152344, "logps/rejected": -368.6526794433594, "loss": 0.0953, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1500093936920166, "rewards/margins": 6.868212699890137, "rewards/rejected": -8.018221855163574, "step": 6210 }, { "epoch": 1.5, "learning_rate": 2.783472989837761e-07, "logits/chosen": -2.3633790016174316, "logits/rejected": -2.3713860511779785, "logps/chosen": -240.88668823242188, "logps/rejected": -341.467041015625, "loss": 0.1382, "rewards/accuracies": 1.0, "rewards/chosen": -0.3870057165622711, "rewards/margins": 8.991482734680176, "rewards/rejected": -9.378487586975098, "step": 6220 }, { "epoch": 1.5, "learning_rate": 2.779015867356035e-07, "logits/chosen": -2.6395559310913086, "logits/rejected": -2.4677324295043945, "logps/chosen": -356.03375244140625, "logps/rejected": -383.40850830078125, "loss": 0.0956, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.233582615852356, "rewards/margins": 8.020268440246582, "rewards/rejected": -9.253849983215332, "step": 6230 }, { "epoch": 1.5, "learning_rate": 2.774558744874309e-07, "logits/chosen": -2.539665460586548, "logits/rejected": -2.538662910461426, "logps/chosen": -249.73861694335938, "logps/rejected": -321.86932373046875, "loss": 0.0861, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.562501311302185, "rewards/margins": 7.141868591308594, "rewards/rejected": -8.70436954498291, "step": 6240 }, { "epoch": 1.5, "learning_rate": 2.7701016223925835e-07, "logits/chosen": -2.4371912479400635, "logits/rejected": -2.4786174297332764, "logps/chosen": -247.0255126953125, "logps/rejected": -413.2113342285156, "loss": 0.0644, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.31962472200393677, "rewards/margins": 10.500336647033691, "rewards/rejected": -10.18071174621582, "step": 6250 }, { "epoch": 1.51, "learning_rate": 2.7656444999108575e-07, "logits/chosen": -2.4717249870300293, "logits/rejected": -2.44594144821167, "logps/chosen": -271.02960205078125, "logps/rejected": -389.52252197265625, "loss": 0.108, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.7181816697120667, "rewards/margins": 10.415331840515137, "rewards/rejected": -9.697149276733398, "step": 6260 }, { "epoch": 1.51, "learning_rate": 2.7611873774291315e-07, "logits/chosen": -2.3463523387908936, "logits/rejected": -2.2759034633636475, "logps/chosen": -314.4310302734375, "logps/rejected": -297.7897033691406, "loss": 0.0727, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.059358298778533936, "rewards/margins": 10.193583488464355, "rewards/rejected": -10.134224891662598, "step": 6270 }, { "epoch": 1.51, "learning_rate": 2.756730254947406e-07, "logits/chosen": -2.372990608215332, "logits/rejected": -2.3513102531433105, "logps/chosen": -187.26589965820312, "logps/rejected": -298.1798400878906, "loss": 0.0952, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.383110761642456, "rewards/margins": 7.358151435852051, "rewards/rejected": -8.741262435913086, "step": 6280 }, { "epoch": 1.51, "learning_rate": 2.75227313246568e-07, "logits/chosen": -2.606323719024658, "logits/rejected": -2.582251787185669, "logps/chosen": -247.609130859375, "logps/rejected": -230.36495971679688, "loss": 0.1148, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.3474022150039673, "rewards/margins": 5.080073356628418, "rewards/rejected": -6.427475929260254, "step": 6290 }, { "epoch": 1.52, "learning_rate": 2.747816009983954e-07, "logits/chosen": -2.346926212310791, "logits/rejected": -2.3074090480804443, "logps/chosen": -197.5868682861328, "logps/rejected": -285.4720458984375, "loss": 0.1181, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.5006293058395386, "rewards/margins": 5.138708114624023, "rewards/rejected": -6.63933801651001, "step": 6300 }, { "epoch": 1.52, "learning_rate": 2.743358887502228e-07, "logits/chosen": -2.4426982402801514, "logits/rejected": -2.5822272300720215, "logps/chosen": -213.3493194580078, "logps/rejected": -277.4560546875, "loss": 0.1475, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.737618088722229, "rewards/margins": 6.50905704498291, "rewards/rejected": -8.246675491333008, "step": 6310 }, { "epoch": 1.52, "learning_rate": 2.738901765020503e-07, "logits/chosen": -2.6064677238464355, "logits/rejected": -2.637498140335083, "logps/chosen": -262.9438171386719, "logps/rejected": -315.25433349609375, "loss": 0.1514, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.899113655090332, "rewards/margins": 6.5391130447387695, "rewards/rejected": -8.438225746154785, "step": 6320 }, { "epoch": 1.52, "learning_rate": 2.734444642538777e-07, "logits/chosen": -2.5768630504608154, "logits/rejected": -2.456533432006836, "logps/chosen": -272.20147705078125, "logps/rejected": -294.9117736816406, "loss": 0.139, "rewards/accuracies": 1.0, "rewards/chosen": 1.5142834186553955, "rewards/margins": 8.897344589233398, "rewards/rejected": -7.383061408996582, "step": 6330 }, { "epoch": 1.53, "learning_rate": 2.729987520057051e-07, "logits/chosen": -2.643470287322998, "logits/rejected": -2.7008514404296875, "logps/chosen": -274.010498046875, "logps/rejected": -338.3312072753906, "loss": 0.1023, "rewards/accuracies": 1.0, "rewards/chosen": 1.253525733947754, "rewards/margins": 9.714042663574219, "rewards/rejected": -8.460517883300781, "step": 6340 }, { "epoch": 1.53, "learning_rate": 2.7255303975753254e-07, "logits/chosen": -2.512911558151245, "logits/rejected": -2.430452823638916, "logps/chosen": -207.1924285888672, "logps/rejected": -359.22052001953125, "loss": 0.0731, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1545320749282837, "rewards/margins": 8.157306671142578, "rewards/rejected": -9.311838150024414, "step": 6350 }, { "epoch": 1.53, "learning_rate": 2.7210732750935994e-07, "logits/chosen": -2.48407244682312, "logits/rejected": -2.511460781097412, "logps/chosen": -254.64480590820312, "logps/rejected": -264.3612060546875, "loss": 0.1433, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.2126295566558838, "rewards/margins": 5.900941371917725, "rewards/rejected": -7.1135711669921875, "step": 6360 }, { "epoch": 1.53, "learning_rate": 2.7166161526118734e-07, "logits/chosen": -2.504002094268799, "logits/rejected": -2.3776869773864746, "logps/chosen": -369.0848388671875, "logps/rejected": -354.16552734375, "loss": 0.0774, "rewards/accuracies": 1.0, "rewards/chosen": 0.13143929839134216, "rewards/margins": 8.589181900024414, "rewards/rejected": -8.457742691040039, "step": 6370 }, { "epoch": 1.54, "learning_rate": 2.712159030130148e-07, "logits/chosen": -2.540222406387329, "logits/rejected": -2.3949813842773438, "logps/chosen": -284.43585205078125, "logps/rejected": -285.30389404296875, "loss": 0.081, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.8853492736816406, "rewards/margins": 4.8586015701293945, "rewards/rejected": -7.743950843811035, "step": 6380 }, { "epoch": 1.54, "learning_rate": 2.707701907648422e-07, "logits/chosen": -2.4505486488342285, "logits/rejected": -2.3909640312194824, "logps/chosen": -290.6482849121094, "logps/rejected": -317.31573486328125, "loss": 0.1226, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6440604329109192, "rewards/margins": 8.36026382446289, "rewards/rejected": -9.004323959350586, "step": 6390 }, { "epoch": 1.54, "learning_rate": 2.703244785166696e-07, "logits/chosen": -2.58925461769104, "logits/rejected": -2.511303424835205, "logps/chosen": -195.77783203125, "logps/rejected": -212.7223358154297, "loss": 0.0789, "rewards/accuracies": 1.0, "rewards/chosen": -1.6220035552978516, "rewards/margins": 6.110626220703125, "rewards/rejected": -7.732629299163818, "step": 6400 }, { "epoch": 1.54, "eval_logits/chosen": -2.2593064308166504, "eval_logits/rejected": -2.221984624862671, "eval_logps/chosen": -248.052734375, "eval_logps/rejected": -263.1014709472656, "eval_loss": 0.5184081196784973, "eval_rewards/accuracies": 0.6775000095367432, "eval_rewards/chosen": -4.6125168800354, "eval_rewards/margins": 2.744553804397583, "eval_rewards/rejected": -7.357071399688721, "eval_runtime": 131.3004, "eval_samples_per_second": 24.036, "eval_steps_per_second": 0.381, "step": 6400 }, { "epoch": 1.54, "learning_rate": 2.6987876626849706e-07, "logits/chosen": -2.4880471229553223, "logits/rejected": -2.4972970485687256, "logps/chosen": -200.48007202148438, "logps/rejected": -252.0703582763672, "loss": 0.0698, "rewards/accuracies": 1.0, "rewards/chosen": -1.7859618663787842, "rewards/margins": 5.987399578094482, "rewards/rejected": -7.7733612060546875, "step": 6410 }, { "epoch": 1.55, "learning_rate": 2.6943305402032446e-07, "logits/chosen": -2.5299830436706543, "logits/rejected": -2.304743528366089, "logps/chosen": -363.29229736328125, "logps/rejected": -313.2667541503906, "loss": 0.091, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.4549384117126465, "rewards/margins": 10.024370193481445, "rewards/rejected": -9.569430351257324, "step": 6420 }, { "epoch": 1.55, "learning_rate": 2.6898734177215186e-07, "logits/chosen": -2.4846057891845703, "logits/rejected": -2.4044010639190674, "logps/chosen": -219.111083984375, "logps/rejected": -255.8389892578125, "loss": 0.1175, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.520255208015442, "rewards/margins": 5.579611778259277, "rewards/rejected": -7.099867343902588, "step": 6430 }, { "epoch": 1.55, "learning_rate": 2.685416295239793e-07, "logits/chosen": -2.5400466918945312, "logits/rejected": -2.5766820907592773, "logps/chosen": -295.1035461425781, "logps/rejected": -357.53485107421875, "loss": 0.0802, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.18065910041332245, "rewards/margins": 8.971368789672852, "rewards/rejected": -9.152027130126953, "step": 6440 }, { "epoch": 1.55, "learning_rate": 2.680959172758067e-07, "logits/chosen": -2.555649518966675, "logits/rejected": -2.529740810394287, "logps/chosen": -233.1663818359375, "logps/rejected": -264.34197998046875, "loss": 0.1138, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.1419332027435303, "rewards/margins": 5.442976951599121, "rewards/rejected": -6.584909915924072, "step": 6450 }, { "epoch": 1.55, "learning_rate": 2.676502050276341e-07, "logits/chosen": -2.569977283477783, "logits/rejected": -2.450141429901123, "logps/chosen": -269.0093994140625, "logps/rejected": -326.5610046386719, "loss": 0.0904, "rewards/accuracies": 1.0, "rewards/chosen": -1.8578832149505615, "rewards/margins": 6.224945545196533, "rewards/rejected": -8.0828275680542, "step": 6460 }, { "epoch": 1.56, "learning_rate": 2.6720449277946153e-07, "logits/chosen": -2.3317441940307617, "logits/rejected": -2.5061981678009033, "logps/chosen": -315.4051513671875, "logps/rejected": -441.2271423339844, "loss": 0.091, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.8155472874641418, "rewards/margins": 10.180726051330566, "rewards/rejected": -9.365178108215332, "step": 6470 }, { "epoch": 1.56, "learning_rate": 2.66758780531289e-07, "logits/chosen": -2.6900036334991455, "logits/rejected": -2.5762057304382324, "logps/chosen": -207.0538330078125, "logps/rejected": -264.35064697265625, "loss": 0.0906, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.15104790031909943, "rewards/margins": 7.0734100341796875, "rewards/rejected": -7.224459171295166, "step": 6480 }, { "epoch": 1.56, "learning_rate": 2.663130682831164e-07, "logits/chosen": -2.6154706478118896, "logits/rejected": -2.4836790561676025, "logps/chosen": -272.6229553222656, "logps/rejected": -256.09967041015625, "loss": 0.1349, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.21876728534698486, "rewards/margins": 7.674716949462891, "rewards/rejected": -7.455949306488037, "step": 6490 }, { "epoch": 1.56, "learning_rate": 2.658673560349438e-07, "logits/chosen": -2.4315361976623535, "logits/rejected": -2.423959493637085, "logps/chosen": -261.1012268066406, "logps/rejected": -308.75390625, "loss": 0.1466, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.2184690237045288, "rewards/margins": 6.828681945800781, "rewards/rejected": -8.047151565551758, "step": 6500 }, { "epoch": 1.57, "learning_rate": 2.6542164378677125e-07, "logits/chosen": -2.3758773803710938, "logits/rejected": -2.428539276123047, "logps/chosen": -224.01278686523438, "logps/rejected": -287.84381103515625, "loss": 0.2044, "rewards/accuracies": 1.0, "rewards/chosen": -2.1266942024230957, "rewards/margins": 5.721441268920898, "rewards/rejected": -7.848135471343994, "step": 6510 }, { "epoch": 1.57, "learning_rate": 2.6497593153859865e-07, "logits/chosen": -2.7203621864318848, "logits/rejected": -2.6722311973571777, "logps/chosen": -295.2500915527344, "logps/rejected": -343.1181640625, "loss": 0.0506, "rewards/accuracies": 1.0, "rewards/chosen": -0.597442626953125, "rewards/margins": 7.230844020843506, "rewards/rejected": -7.828286647796631, "step": 6520 }, { "epoch": 1.57, "learning_rate": 2.6453021929042605e-07, "logits/chosen": -2.5043766498565674, "logits/rejected": -2.419970750808716, "logps/chosen": -243.61221313476562, "logps/rejected": -362.5774230957031, "loss": 0.0978, "rewards/accuracies": 1.0, "rewards/chosen": -0.27581435441970825, "rewards/margins": 7.525368690490723, "rewards/rejected": -7.801183223724365, "step": 6530 }, { "epoch": 1.57, "learning_rate": 2.640845070422535e-07, "logits/chosen": -2.360407590866089, "logits/rejected": -2.2817015647888184, "logps/chosen": -203.2313995361328, "logps/rejected": -193.2275848388672, "loss": 0.092, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.4021323919296265, "rewards/margins": 5.137397289276123, "rewards/rejected": -6.539530277252197, "step": 6540 }, { "epoch": 1.58, "learning_rate": 2.636387947940809e-07, "logits/chosen": -2.5603091716766357, "logits/rejected": -2.5812692642211914, "logps/chosen": -252.48324584960938, "logps/rejected": -280.01495361328125, "loss": 0.1424, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.5431323051452637, "rewards/margins": 4.878392696380615, "rewards/rejected": -7.421525001525879, "step": 6550 }, { "epoch": 1.58, "learning_rate": 2.631930825459083e-07, "logits/chosen": -2.4909424781799316, "logits/rejected": -2.47674822807312, "logps/chosen": -203.90139770507812, "logps/rejected": -323.13922119140625, "loss": 0.0653, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.3505728244781494, "rewards/margins": 6.907691955566406, "rewards/rejected": -9.258264541625977, "step": 6560 }, { "epoch": 1.58, "learning_rate": 2.6274737029773577e-07, "logits/chosen": -2.509265184402466, "logits/rejected": -2.5538954734802246, "logps/chosen": -204.9351348876953, "logps/rejected": -340.92047119140625, "loss": 0.0754, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5897654294967651, "rewards/margins": 7.562819480895996, "rewards/rejected": -8.152585983276367, "step": 6570 }, { "epoch": 1.58, "learning_rate": 2.6230165804956317e-07, "logits/chosen": -2.493110179901123, "logits/rejected": -2.3449318408966064, "logps/chosen": -273.14569091796875, "logps/rejected": -325.2339782714844, "loss": 0.0913, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.365891456604004, "rewards/margins": 6.012226581573486, "rewards/rejected": -7.37811803817749, "step": 6580 }, { "epoch": 1.59, "learning_rate": 2.618559458013906e-07, "logits/chosen": -2.7275311946868896, "logits/rejected": -2.59036922454834, "logps/chosen": -288.2721862792969, "logps/rejected": -318.25701904296875, "loss": 0.1091, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.019632434472441673, "rewards/margins": 8.365331649780273, "rewards/rejected": -8.345699310302734, "step": 6590 }, { "epoch": 1.59, "learning_rate": 2.6141023355321803e-07, "logits/chosen": -2.4824635982513428, "logits/rejected": -2.421909809112549, "logps/chosen": -401.34173583984375, "logps/rejected": -400.68011474609375, "loss": 0.1259, "rewards/accuracies": 1.0, "rewards/chosen": -1.043386459350586, "rewards/margins": 8.686726570129395, "rewards/rejected": -9.730113983154297, "step": 6600 }, { "epoch": 1.59, "learning_rate": 2.6096452130504543e-07, "logits/chosen": -2.619050979614258, "logits/rejected": -2.5701723098754883, "logps/chosen": -281.85980224609375, "logps/rejected": -437.05523681640625, "loss": 0.0678, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.3362850546836853, "rewards/margins": 9.662748336791992, "rewards/rejected": -9.999032974243164, "step": 6610 }, { "epoch": 1.59, "learning_rate": 2.6051880905687284e-07, "logits/chosen": -2.6397461891174316, "logits/rejected": -2.595975637435913, "logps/chosen": -291.05242919921875, "logps/rejected": -292.87384033203125, "loss": 0.1049, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.23073410987854004, "rewards/margins": 6.005520820617676, "rewards/rejected": -6.236255168914795, "step": 6620 }, { "epoch": 1.6, "learning_rate": 2.6007309680870024e-07, "logits/chosen": -2.5201048851013184, "logits/rejected": -2.3694489002227783, "logps/chosen": -304.94183349609375, "logps/rejected": -335.90234375, "loss": 0.093, "rewards/accuracies": 1.0, "rewards/chosen": -1.141577959060669, "rewards/margins": 7.916557312011719, "rewards/rejected": -9.058136940002441, "step": 6630 }, { "epoch": 1.6, "learning_rate": 2.596273845605277e-07, "logits/chosen": -2.6051645278930664, "logits/rejected": -2.445042133331299, "logps/chosen": -376.5633239746094, "logps/rejected": -316.4339294433594, "loss": 0.1029, "rewards/accuracies": 1.0, "rewards/chosen": 0.18511851131916046, "rewards/margins": 7.910219669342041, "rewards/rejected": -7.725100040435791, "step": 6640 }, { "epoch": 1.6, "learning_rate": 2.591816723123551e-07, "logits/chosen": -2.521838426589966, "logits/rejected": -2.405672311782837, "logps/chosen": -268.4342956542969, "logps/rejected": -374.9623718261719, "loss": 0.0886, "rewards/accuracies": 1.0, "rewards/chosen": 0.1511344462633133, "rewards/margins": 11.82387924194336, "rewards/rejected": -11.672745704650879, "step": 6650 }, { "epoch": 1.6, "learning_rate": 2.5873596006418255e-07, "logits/chosen": -2.351754665374756, "logits/rejected": -2.4746947288513184, "logps/chosen": -174.59805297851562, "logps/rejected": -222.7696075439453, "loss": 0.102, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.877276062965393, "rewards/margins": 4.7677321434021, "rewards/rejected": -6.645008087158203, "step": 6660 }, { "epoch": 1.61, "learning_rate": 2.5829024781601e-07, "logits/chosen": -2.329702138900757, "logits/rejected": -2.3888792991638184, "logps/chosen": -282.3338928222656, "logps/rejected": -427.6305236816406, "loss": 0.1305, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.5918933153152466, "rewards/margins": 9.590441703796387, "rewards/rejected": -10.18233585357666, "step": 6670 }, { "epoch": 1.61, "learning_rate": 2.578445355678374e-07, "logits/chosen": -2.5636491775512695, "logits/rejected": -2.4580605030059814, "logps/chosen": -180.61399841308594, "logps/rejected": -246.103759765625, "loss": 0.1198, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.5207059383392334, "rewards/margins": 5.905128479003906, "rewards/rejected": -8.425833702087402, "step": 6680 }, { "epoch": 1.61, "learning_rate": 2.573988233196648e-07, "logits/chosen": -2.4034101963043213, "logits/rejected": -2.450488567352295, "logps/chosen": -212.5645294189453, "logps/rejected": -364.51800537109375, "loss": 0.0828, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6967671513557434, "rewards/margins": 9.882649421691895, "rewards/rejected": -10.579416275024414, "step": 6690 }, { "epoch": 1.61, "learning_rate": 2.5695311107149227e-07, "logits/chosen": -2.623979091644287, "logits/rejected": -2.510005474090576, "logps/chosen": -323.07916259765625, "logps/rejected": -392.9112548828125, "loss": 0.0996, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.5257644653320312, "rewards/margins": 10.530187606811523, "rewards/rejected": -12.055953025817871, "step": 6700 }, { "epoch": 1.61, "learning_rate": 2.565073988233197e-07, "logits/chosen": -2.5732877254486084, "logits/rejected": -2.4544434547424316, "logps/chosen": -191.06942749023438, "logps/rejected": -275.33612060546875, "loss": 0.1, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.2350542545318604, "rewards/margins": 6.511356353759766, "rewards/rejected": -8.74640941619873, "step": 6710 }, { "epoch": 1.62, "learning_rate": 2.560616865751471e-07, "logits/chosen": -2.662848949432373, "logits/rejected": -2.6330981254577637, "logps/chosen": -295.5281677246094, "logps/rejected": -355.705810546875, "loss": 0.071, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.05338757112622261, "rewards/margins": 9.251906394958496, "rewards/rejected": -9.30529499053955, "step": 6720 }, { "epoch": 1.62, "learning_rate": 2.5561597432697453e-07, "logits/chosen": -2.643730640411377, "logits/rejected": -2.5296835899353027, "logps/chosen": -277.45465087890625, "logps/rejected": -385.3217468261719, "loss": 0.0975, "rewards/accuracies": 1.0, "rewards/chosen": -0.6557458639144897, "rewards/margins": 9.16593074798584, "rewards/rejected": -9.821678161621094, "step": 6730 }, { "epoch": 1.62, "learning_rate": 2.5517026207880194e-07, "logits/chosen": -2.7320146560668945, "logits/rejected": -2.4216275215148926, "logps/chosen": -343.2032165527344, "logps/rejected": -280.29327392578125, "loss": 0.1192, "rewards/accuracies": 1.0, "rewards/chosen": -0.42550569772720337, "rewards/margins": 7.7524094581604, "rewards/rejected": -8.177915573120117, "step": 6740 }, { "epoch": 1.62, "learning_rate": 2.5472454983062934e-07, "logits/chosen": -2.399095058441162, "logits/rejected": -2.4269192218780518, "logps/chosen": -265.23016357421875, "logps/rejected": -318.9640197753906, "loss": 0.1568, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.0254114866256714, "rewards/margins": 6.011340141296387, "rewards/rejected": -7.036751747131348, "step": 6750 }, { "epoch": 1.63, "learning_rate": 2.542788375824568e-07, "logits/chosen": -2.751743793487549, "logits/rejected": -2.635657787322998, "logps/chosen": -289.47552490234375, "logps/rejected": -322.9488830566406, "loss": 0.1744, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7109371423721313, "rewards/margins": 5.925104141235352, "rewards/rejected": -6.636041164398193, "step": 6760 }, { "epoch": 1.63, "learning_rate": 2.538331253342842e-07, "logits/chosen": -2.623124599456787, "logits/rejected": -2.4960763454437256, "logps/chosen": -261.3349609375, "logps/rejected": -296.0354309082031, "loss": 0.0865, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.05223352834582329, "rewards/margins": 8.582305908203125, "rewards/rejected": -8.634539604187012, "step": 6770 }, { "epoch": 1.63, "learning_rate": 2.533874130861116e-07, "logits/chosen": -2.603426218032837, "logits/rejected": -2.652134418487549, "logps/chosen": -262.34393310546875, "logps/rejected": -321.20977783203125, "loss": 0.1384, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.04393220692873001, "rewards/margins": 6.213263034820557, "rewards/rejected": -6.257195949554443, "step": 6780 }, { "epoch": 1.63, "learning_rate": 2.5294170083793906e-07, "logits/chosen": -2.830530881881714, "logits/rejected": -2.8101234436035156, "logps/chosen": -294.06719970703125, "logps/rejected": -318.9193420410156, "loss": 0.0913, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.9768505096435547, "rewards/margins": 6.7217116355896, "rewards/rejected": -8.698562622070312, "step": 6790 }, { "epoch": 1.64, "learning_rate": 2.5249598858976646e-07, "logits/chosen": -2.626084804534912, "logits/rejected": -2.5628437995910645, "logps/chosen": -315.3648986816406, "logps/rejected": -332.21868896484375, "loss": 0.1274, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.9488713145256042, "rewards/margins": 9.667269706726074, "rewards/rejected": -8.718399047851562, "step": 6800 }, { "epoch": 1.64, "eval_logits/chosen": -2.365267753601074, "eval_logits/rejected": -2.323819160461426, "eval_logps/chosen": -241.0087127685547, "eval_logps/rejected": -254.7549285888672, "eval_loss": 0.5138404369354248, "eval_rewards/accuracies": 0.6650000214576721, "eval_rewards/chosen": -3.9081151485443115, "eval_rewards/margins": 2.614298105239868, "eval_rewards/rejected": -6.522413730621338, "eval_runtime": 132.324, "eval_samples_per_second": 23.851, "eval_steps_per_second": 0.378, "step": 6800 }, { "epoch": 1.64, "learning_rate": 2.5205027634159386e-07, "logits/chosen": -2.77650785446167, "logits/rejected": -2.554332733154297, "logps/chosen": -264.7838134765625, "logps/rejected": -263.158447265625, "loss": 0.1154, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.0337071418762207, "rewards/margins": 5.510695457458496, "rewards/rejected": -7.544403076171875, "step": 6810 }, { "epoch": 1.64, "learning_rate": 2.5160456409342126e-07, "logits/chosen": -2.7782604694366455, "logits/rejected": -2.7263779640197754, "logps/chosen": -331.07598876953125, "logps/rejected": -320.32696533203125, "loss": 0.094, "rewards/accuracies": 1.0, "rewards/chosen": 0.01526255626231432, "rewards/margins": 7.255690097808838, "rewards/rejected": -7.240427494049072, "step": 6820 }, { "epoch": 1.64, "learning_rate": 2.511588518452487e-07, "logits/chosen": -2.7435340881347656, "logits/rejected": -2.6303529739379883, "logps/chosen": -292.32000732421875, "logps/rejected": -340.9613037109375, "loss": 0.0871, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.869441032409668, "rewards/margins": 6.238544464111328, "rewards/rejected": -8.10798454284668, "step": 6830 }, { "epoch": 1.65, "learning_rate": 2.507131395970761e-07, "logits/chosen": -2.6717050075531006, "logits/rejected": -2.720207691192627, "logps/chosen": -284.22979736328125, "logps/rejected": -394.85430908203125, "loss": 0.0877, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0687626600265503, "rewards/margins": 8.739702224731445, "rewards/rejected": -9.808465957641602, "step": 6840 }, { "epoch": 1.65, "learning_rate": 2.502674273489035e-07, "logits/chosen": -2.703340530395508, "logits/rejected": -2.5412425994873047, "logps/chosen": -339.227294921875, "logps/rejected": -345.09332275390625, "loss": 0.0969, "rewards/accuracies": 1.0, "rewards/chosen": -0.5817199945449829, "rewards/margins": 7.840517520904541, "rewards/rejected": -8.422237396240234, "step": 6850 }, { "epoch": 1.65, "learning_rate": 2.49821715100731e-07, "logits/chosen": -2.7909388542175293, "logits/rejected": -2.589139699935913, "logps/chosen": -278.817626953125, "logps/rejected": -346.43548583984375, "loss": 0.0485, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.4508662819862366, "rewards/margins": 9.999730110168457, "rewards/rejected": -9.548861503601074, "step": 6860 }, { "epoch": 1.65, "learning_rate": 2.493760028525584e-07, "logits/chosen": -2.43666672706604, "logits/rejected": -2.3847169876098633, "logps/chosen": -212.90188598632812, "logps/rejected": -268.1318359375, "loss": 0.0691, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2572885751724243, "rewards/margins": 6.4423394203186035, "rewards/rejected": -7.6996283531188965, "step": 6870 }, { "epoch": 1.66, "learning_rate": 2.489302906043858e-07, "logits/chosen": -2.7076172828674316, "logits/rejected": -2.6247425079345703, "logps/chosen": -284.4060974121094, "logps/rejected": -417.0311584472656, "loss": 0.1088, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.40026673674583435, "rewards/margins": 9.445573806762695, "rewards/rejected": -9.845841407775879, "step": 6880 }, { "epoch": 1.66, "learning_rate": 2.4848457835621324e-07, "logits/chosen": -2.670478343963623, "logits/rejected": -2.541105031967163, "logps/chosen": -234.4295654296875, "logps/rejected": -348.7154846191406, "loss": 0.1173, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9956146478652954, "rewards/margins": 9.453681945800781, "rewards/rejected": -10.449296951293945, "step": 6890 }, { "epoch": 1.66, "learning_rate": 2.4803886610804065e-07, "logits/chosen": -2.572587251663208, "logits/rejected": -2.5934700965881348, "logps/chosen": -194.14425659179688, "logps/rejected": -309.1454772949219, "loss": 0.1215, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1737267971038818, "rewards/margins": 7.2784600257873535, "rewards/rejected": -8.45218563079834, "step": 6900 }, { "epoch": 1.66, "learning_rate": 2.4759315385986805e-07, "logits/chosen": -2.53065824508667, "logits/rejected": -2.599759578704834, "logps/chosen": -223.47616577148438, "logps/rejected": -344.7876892089844, "loss": 0.1513, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.4943052530288696, "rewards/margins": 5.8011555671691895, "rewards/rejected": -7.2954607009887695, "step": 6910 }, { "epoch": 1.67, "learning_rate": 2.471474416116955e-07, "logits/chosen": -2.3176236152648926, "logits/rejected": -2.2783634662628174, "logps/chosen": -253.1682891845703, "logps/rejected": -328.687744140625, "loss": 0.1323, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.5988147258758545, "rewards/margins": 9.450356483459473, "rewards/rejected": -11.04917049407959, "step": 6920 }, { "epoch": 1.67, "learning_rate": 2.467017293635229e-07, "logits/chosen": -2.734055995941162, "logits/rejected": -2.7162024974823, "logps/chosen": -247.93777465820312, "logps/rejected": -356.2265930175781, "loss": 0.0788, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5648818016052246, "rewards/margins": 6.425793647766113, "rewards/rejected": -6.9906744956970215, "step": 6930 }, { "epoch": 1.67, "learning_rate": 2.462560171153503e-07, "logits/chosen": -2.5415902137756348, "logits/rejected": -2.540515422821045, "logps/chosen": -297.2126770019531, "logps/rejected": -345.50750732421875, "loss": 0.133, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.012619865126907825, "rewards/margins": 8.577271461486816, "rewards/rejected": -8.58989143371582, "step": 6940 }, { "epoch": 1.67, "learning_rate": 2.4581030486717777e-07, "logits/chosen": -2.3323845863342285, "logits/rejected": -2.2749600410461426, "logps/chosen": -324.28778076171875, "logps/rejected": -375.01202392578125, "loss": 0.0685, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.43227776885032654, "rewards/margins": 8.051684379577637, "rewards/rejected": -7.619407653808594, "step": 6950 }, { "epoch": 1.68, "learning_rate": 2.4536459261900517e-07, "logits/chosen": -2.6356239318847656, "logits/rejected": -2.7263784408569336, "logps/chosen": -235.08642578125, "logps/rejected": -319.6094665527344, "loss": 0.0968, "rewards/accuracies": 1.0, "rewards/chosen": -0.5624276399612427, "rewards/margins": 7.660147666931152, "rewards/rejected": -8.222575187683105, "step": 6960 }, { "epoch": 1.68, "learning_rate": 2.4491888037083257e-07, "logits/chosen": -2.4503002166748047, "logits/rejected": -2.4879584312438965, "logps/chosen": -333.0428771972656, "logps/rejected": -408.75396728515625, "loss": 0.1068, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0898610353469849, "rewards/margins": 9.749977111816406, "rewards/rejected": -10.839839935302734, "step": 6970 }, { "epoch": 1.68, "learning_rate": 2.4447316812266e-07, "logits/chosen": -2.6729576587677, "logits/rejected": -2.591397523880005, "logps/chosen": -248.69216918945312, "logps/rejected": -242.4110870361328, "loss": 0.1096, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.16907243430614471, "rewards/margins": 6.555850028991699, "rewards/rejected": -6.724922180175781, "step": 6980 }, { "epoch": 1.68, "learning_rate": 2.4402745587448743e-07, "logits/chosen": -2.547302722930908, "logits/rejected": -2.5183403491973877, "logps/chosen": -277.13690185546875, "logps/rejected": -335.0595397949219, "loss": 0.076, "rewards/accuracies": 1.0, "rewards/chosen": -1.159125804901123, "rewards/margins": 5.9451985359191895, "rewards/rejected": -7.104323387145996, "step": 6990 }, { "epoch": 1.68, "learning_rate": 2.4358174362631483e-07, "logits/chosen": -2.517106771469116, "logits/rejected": -2.4098610877990723, "logps/chosen": -243.20498657226562, "logps/rejected": -336.9594421386719, "loss": 0.0656, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.07006768882274628, "rewards/margins": 8.30532169342041, "rewards/rejected": -8.375389099121094, "step": 7000 }, { "epoch": 1.69, "learning_rate": 2.4313603137814224e-07, "logits/chosen": -2.7634055614471436, "logits/rejected": -2.5951571464538574, "logps/chosen": -297.3189697265625, "logps/rejected": -261.8771057128906, "loss": 0.1157, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.14124087989330292, "rewards/margins": 7.4747467041015625, "rewards/rejected": -7.6159868240356445, "step": 7010 }, { "epoch": 1.69, "learning_rate": 2.426903191299697e-07, "logits/chosen": -2.69368577003479, "logits/rejected": -2.563884735107422, "logps/chosen": -283.6263427734375, "logps/rejected": -298.4433288574219, "loss": 0.0947, "rewards/accuracies": 1.0, "rewards/chosen": -0.19579832255840302, "rewards/margins": 6.992488861083984, "rewards/rejected": -7.188286781311035, "step": 7020 }, { "epoch": 1.69, "learning_rate": 2.422446068817971e-07, "logits/chosen": -2.6995270252227783, "logits/rejected": -2.5794615745544434, "logps/chosen": -259.35845947265625, "logps/rejected": -298.73101806640625, "loss": 0.1126, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.14753352105617523, "rewards/margins": 6.613396644592285, "rewards/rejected": -6.760930061340332, "step": 7030 }, { "epoch": 1.69, "learning_rate": 2.417988946336245e-07, "logits/chosen": -2.691283941268921, "logits/rejected": -2.634284496307373, "logps/chosen": -251.0098419189453, "logps/rejected": -373.2581481933594, "loss": 0.0714, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 1.1844494342803955, "rewards/margins": 10.074152946472168, "rewards/rejected": -8.889702796936035, "step": 7040 }, { "epoch": 1.7, "learning_rate": 2.4135318238545195e-07, "logits/chosen": -2.7262206077575684, "logits/rejected": -2.604710102081299, "logps/chosen": -303.147705078125, "logps/rejected": -384.01470947265625, "loss": 0.0542, "rewards/accuracies": 1.0, "rewards/chosen": -0.17617926001548767, "rewards/margins": 10.112824440002441, "rewards/rejected": -10.289003372192383, "step": 7050 }, { "epoch": 1.7, "learning_rate": 2.4090747013727936e-07, "logits/chosen": -2.4994661808013916, "logits/rejected": -2.5262341499328613, "logps/chosen": -326.6519775390625, "logps/rejected": -336.6358337402344, "loss": 0.0872, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.24213656783103943, "rewards/margins": 8.929125785827637, "rewards/rejected": -9.17126178741455, "step": 7060 }, { "epoch": 1.7, "learning_rate": 2.4046175788910676e-07, "logits/chosen": -2.6846208572387695, "logits/rejected": -2.664703845977783, "logps/chosen": -236.06387329101562, "logps/rejected": -355.14044189453125, "loss": 0.1302, "rewards/accuracies": 1.0, "rewards/chosen": -0.8785532712936401, "rewards/margins": 6.995070457458496, "rewards/rejected": -7.873623847961426, "step": 7070 }, { "epoch": 1.7, "learning_rate": 2.400160456409342e-07, "logits/chosen": -2.6251165866851807, "logits/rejected": -2.6238937377929688, "logps/chosen": -199.33859252929688, "logps/rejected": -321.19354248046875, "loss": 0.1684, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0854692459106445, "rewards/margins": 7.993255615234375, "rewards/rejected": -9.078723907470703, "step": 7080 }, { "epoch": 1.71, "learning_rate": 2.395703333927616e-07, "logits/chosen": -2.5831985473632812, "logits/rejected": -2.6258349418640137, "logps/chosen": -200.64434814453125, "logps/rejected": -322.10333251953125, "loss": 0.1271, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.27437877655029297, "rewards/margins": 9.108819961547852, "rewards/rejected": -8.834441184997559, "step": 7090 }, { "epoch": 1.71, "learning_rate": 2.39124621144589e-07, "logits/chosen": -2.6364359855651855, "logits/rejected": -2.613237142562866, "logps/chosen": -227.36788940429688, "logps/rejected": -310.9380187988281, "loss": 0.1224, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.7503639459609985, "rewards/margins": 9.57800579071045, "rewards/rejected": -8.827642440795898, "step": 7100 }, { "epoch": 1.71, "learning_rate": 2.386789088964165e-07, "logits/chosen": -2.6473875045776367, "logits/rejected": -2.614420175552368, "logps/chosen": -312.0506286621094, "logps/rejected": -355.6326904296875, "loss": 0.1323, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5325582027435303, "rewards/margins": 7.201295375823975, "rewards/rejected": -7.7338547706604, "step": 7110 }, { "epoch": 1.71, "learning_rate": 2.3823319664824388e-07, "logits/chosen": -2.4910478591918945, "logits/rejected": -2.57076096534729, "logps/chosen": -212.93893432617188, "logps/rejected": -295.931396484375, "loss": 0.078, "rewards/accuracies": 1.0, "rewards/chosen": -1.4131343364715576, "rewards/margins": 6.202882289886475, "rewards/rejected": -7.6160173416137695, "step": 7120 }, { "epoch": 1.72, "learning_rate": 2.3778748440007128e-07, "logits/chosen": -2.726280927658081, "logits/rejected": -2.5627434253692627, "logps/chosen": -317.63458251953125, "logps/rejected": -292.7439880371094, "loss": 0.1031, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6963149309158325, "rewards/margins": 7.364465236663818, "rewards/rejected": -8.06078052520752, "step": 7130 }, { "epoch": 1.72, "learning_rate": 2.373417721518987e-07, "logits/chosen": -2.5128579139709473, "logits/rejected": -2.5653254985809326, "logps/chosen": -226.4662628173828, "logps/rejected": -327.03021240234375, "loss": 0.0882, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8353655934333801, "rewards/margins": 8.105393409729004, "rewards/rejected": -8.94075870513916, "step": 7140 }, { "epoch": 1.72, "learning_rate": 2.3689605990372614e-07, "logits/chosen": -2.7434535026550293, "logits/rejected": -2.675881862640381, "logps/chosen": -264.567626953125, "logps/rejected": -302.3722839355469, "loss": 0.0793, "rewards/accuracies": 1.0, "rewards/chosen": -0.9892032742500305, "rewards/margins": 7.415769100189209, "rewards/rejected": -8.404972076416016, "step": 7150 }, { "epoch": 1.72, "learning_rate": 2.3645034765555354e-07, "logits/chosen": -2.623859405517578, "logits/rejected": -2.623645305633545, "logps/chosen": -193.98988342285156, "logps/rejected": -296.64398193359375, "loss": 0.0904, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2686735391616821, "rewards/margins": 8.979497909545898, "rewards/rejected": -10.248170852661133, "step": 7160 }, { "epoch": 1.73, "learning_rate": 2.36004635407381e-07, "logits/chosen": -2.633744478225708, "logits/rejected": -2.7162396907806396, "logps/chosen": -256.90264892578125, "logps/rejected": -354.1653747558594, "loss": 0.1246, "rewards/accuracies": 1.0, "rewards/chosen": -0.4706287384033203, "rewards/margins": 8.282906532287598, "rewards/rejected": -8.753534317016602, "step": 7170 }, { "epoch": 1.73, "learning_rate": 2.3555892315920843e-07, "logits/chosen": -2.6927480697631836, "logits/rejected": -2.629392623901367, "logps/chosen": -257.0445251464844, "logps/rejected": -257.18994140625, "loss": 0.1222, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.6668189764022827, "rewards/margins": 5.208324909210205, "rewards/rejected": -6.875143527984619, "step": 7180 }, { "epoch": 1.73, "learning_rate": 2.3511321091103583e-07, "logits/chosen": -2.730381488800049, "logits/rejected": -2.6737563610076904, "logps/chosen": -287.07574462890625, "logps/rejected": -437.490966796875, "loss": 0.1035, "rewards/accuracies": 1.0, "rewards/chosen": -1.7772750854492188, "rewards/margins": 8.56248664855957, "rewards/rejected": -10.339761734008789, "step": 7190 }, { "epoch": 1.73, "learning_rate": 2.3466749866286326e-07, "logits/chosen": -2.7791638374328613, "logits/rejected": -2.6489243507385254, "logps/chosen": -381.9056701660156, "logps/rejected": -393.8326110839844, "loss": 0.1095, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.6104615926742554, "rewards/margins": 9.136541366577148, "rewards/rejected": -9.747003555297852, "step": 7200 }, { "epoch": 1.73, "eval_logits/chosen": -2.339570999145508, "eval_logits/rejected": -2.298337459564209, "eval_logps/chosen": -243.2823028564453, "eval_logps/rejected": -259.2771911621094, "eval_loss": 0.5153447985649109, "eval_rewards/accuracies": 0.675000011920929, "eval_rewards/chosen": -4.135477066040039, "eval_rewards/margins": 2.839163303375244, "eval_rewards/rejected": -6.974639892578125, "eval_runtime": 131.5205, "eval_samples_per_second": 23.996, "eval_steps_per_second": 0.38, "step": 7200 }, { "epoch": 1.74, "learning_rate": 2.3422178641469066e-07, "logits/chosen": -2.5644402503967285, "logits/rejected": -2.538790225982666, "logps/chosen": -243.1437225341797, "logps/rejected": -303.4709167480469, "loss": 0.0984, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9559003710746765, "rewards/margins": 7.72446346282959, "rewards/rejected": -8.680364608764648, "step": 7210 }, { "epoch": 1.74, "learning_rate": 2.337760741665181e-07, "logits/chosen": -2.632382869720459, "logits/rejected": -2.610016345977783, "logps/chosen": -311.9110412597656, "logps/rejected": -422.59027099609375, "loss": 0.0519, "rewards/accuracies": 1.0, "rewards/chosen": -0.15952453017234802, "rewards/margins": 9.718436241149902, "rewards/rejected": -9.877962112426758, "step": 7220 }, { "epoch": 1.74, "learning_rate": 2.3333036191834552e-07, "logits/chosen": -2.565765142440796, "logits/rejected": -2.528348445892334, "logps/chosen": -199.43502807617188, "logps/rejected": -240.23080444335938, "loss": 0.0994, "rewards/accuracies": 1.0, "rewards/chosen": -1.3454731702804565, "rewards/margins": 5.091964244842529, "rewards/rejected": -6.437438011169434, "step": 7230 }, { "epoch": 1.74, "learning_rate": 2.3288464967017293e-07, "logits/chosen": -2.720362663269043, "logits/rejected": -2.5842232704162598, "logps/chosen": -306.49310302734375, "logps/rejected": -400.3758544921875, "loss": 0.1315, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.2581801414489746, "rewards/margins": 6.43633508682251, "rewards/rejected": -8.694514274597168, "step": 7240 }, { "epoch": 1.74, "learning_rate": 2.3243893742200035e-07, "logits/chosen": -2.5583643913269043, "logits/rejected": -2.6254963874816895, "logps/chosen": -300.7047119140625, "logps/rejected": -331.8702392578125, "loss": 0.0998, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.4756503105163574, "rewards/margins": 4.999587059020996, "rewards/rejected": -7.475237846374512, "step": 7250 }, { "epoch": 1.75, "learning_rate": 2.3199322517382778e-07, "logits/chosen": -2.6709704399108887, "logits/rejected": -2.5990965366363525, "logps/chosen": -279.72833251953125, "logps/rejected": -345.9717712402344, "loss": 0.1014, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.2836154699325562, "rewards/margins": 6.974087715148926, "rewards/rejected": -8.257701873779297, "step": 7260 }, { "epoch": 1.75, "learning_rate": 2.315475129256552e-07, "logits/chosen": -2.6193602085113525, "logits/rejected": -2.67102313041687, "logps/chosen": -227.65225219726562, "logps/rejected": -347.7125549316406, "loss": 0.1003, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.2079720497131348, "rewards/margins": 6.406277656555176, "rewards/rejected": -8.614249229431152, "step": 7270 }, { "epoch": 1.75, "learning_rate": 2.3110180067748262e-07, "logits/chosen": -2.512887954711914, "logits/rejected": -2.5485825538635254, "logps/chosen": -229.8495330810547, "logps/rejected": -265.9822692871094, "loss": 0.1176, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.186687469482422, "rewards/margins": 5.730704307556152, "rewards/rejected": -7.917391777038574, "step": 7280 }, { "epoch": 1.75, "learning_rate": 2.3065608842931002e-07, "logits/chosen": -2.5849320888519287, "logits/rejected": -2.51088285446167, "logps/chosen": -391.68157958984375, "logps/rejected": -333.1160583496094, "loss": 0.0574, "rewards/accuracies": 1.0, "rewards/chosen": -2.3726003170013428, "rewards/margins": 6.668314456939697, "rewards/rejected": -9.040914535522461, "step": 7290 }, { "epoch": 1.76, "learning_rate": 2.3021037618113745e-07, "logits/chosen": -2.289815664291382, "logits/rejected": -2.3414039611816406, "logps/chosen": -380.6048889160156, "logps/rejected": -349.00054931640625, "loss": 0.0835, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1775258779525757, "rewards/margins": 8.12243938446045, "rewards/rejected": -9.299964904785156, "step": 7300 }, { "epoch": 1.76, "learning_rate": 2.2976466393296488e-07, "logits/chosen": -2.578829050064087, "logits/rejected": -2.5659432411193848, "logps/chosen": -239.71578979492188, "logps/rejected": -303.98968505859375, "loss": 0.1334, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.5287086963653564, "rewards/margins": 8.155741691589355, "rewards/rejected": -9.684450149536133, "step": 7310 }, { "epoch": 1.76, "learning_rate": 2.2931895168479228e-07, "logits/chosen": -2.390089511871338, "logits/rejected": -2.44716215133667, "logps/chosen": -283.4778747558594, "logps/rejected": -364.2710876464844, "loss": 0.1251, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.3863930702209473, "rewards/margins": 8.288941383361816, "rewards/rejected": -11.675333976745605, "step": 7320 }, { "epoch": 1.76, "learning_rate": 2.288732394366197e-07, "logits/chosen": -2.642254114151001, "logits/rejected": -2.433981418609619, "logps/chosen": -242.18017578125, "logps/rejected": -267.94879150390625, "loss": 0.0956, "rewards/accuracies": 1.0, "rewards/chosen": -1.964556097984314, "rewards/margins": 7.220114707946777, "rewards/rejected": -9.184670448303223, "step": 7330 }, { "epoch": 1.77, "learning_rate": 2.2842752718844714e-07, "logits/chosen": -2.7937426567077637, "logits/rejected": -2.764474868774414, "logps/chosen": -302.13031005859375, "logps/rejected": -317.9931945800781, "loss": 0.1392, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.39774584770202637, "rewards/margins": 8.073554039001465, "rewards/rejected": -8.47130012512207, "step": 7340 }, { "epoch": 1.77, "learning_rate": 2.2798181494027454e-07, "logits/chosen": -2.374725341796875, "logits/rejected": -2.4418551921844482, "logps/chosen": -278.1357116699219, "logps/rejected": -299.0234375, "loss": 0.2138, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.0053553581237793, "rewards/margins": 5.862728118896484, "rewards/rejected": -7.868082523345947, "step": 7350 }, { "epoch": 1.77, "learning_rate": 2.2753610269210197e-07, "logits/chosen": -2.5299973487854004, "logits/rejected": -2.495958089828491, "logps/chosen": -232.66177368164062, "logps/rejected": -313.27581787109375, "loss": 0.0797, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.3753086030483246, "rewards/margins": 8.323644638061523, "rewards/rejected": -8.698953628540039, "step": 7360 }, { "epoch": 1.77, "learning_rate": 2.2709039044392937e-07, "logits/chosen": -2.585207223892212, "logits/rejected": -2.3481059074401855, "logps/chosen": -237.22738647460938, "logps/rejected": -311.93524169921875, "loss": 0.0797, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7202649116516113, "rewards/margins": 8.996984481811523, "rewards/rejected": -9.717249870300293, "step": 7370 }, { "epoch": 1.78, "learning_rate": 2.266446781957568e-07, "logits/chosen": -2.576906442642212, "logits/rejected": -2.4248411655426025, "logps/chosen": -263.6038513183594, "logps/rejected": -263.1639709472656, "loss": 0.1163, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6332124471664429, "rewards/margins": 7.621026515960693, "rewards/rejected": -8.254239082336426, "step": 7380 }, { "epoch": 1.78, "learning_rate": 2.2619896594758423e-07, "logits/chosen": -2.674010992050171, "logits/rejected": -2.5529427528381348, "logps/chosen": -367.4625549316406, "logps/rejected": -397.8846740722656, "loss": 0.0956, "rewards/accuracies": 1.0, "rewards/chosen": 1.2886005640029907, "rewards/margins": 10.786130905151367, "rewards/rejected": -9.497529983520508, "step": 7390 }, { "epoch": 1.78, "learning_rate": 2.2575325369941164e-07, "logits/chosen": -2.471353054046631, "logits/rejected": -2.4338858127593994, "logps/chosen": -263.5669860839844, "logps/rejected": -297.22998046875, "loss": 0.1119, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.2962851524353027, "rewards/margins": 5.4361186027526855, "rewards/rejected": -7.732403755187988, "step": 7400 }, { "epoch": 1.78, "learning_rate": 2.2530754145123907e-07, "logits/chosen": -2.433509349822998, "logits/rejected": -2.3731446266174316, "logps/chosen": -196.04039001464844, "logps/rejected": -270.1302490234375, "loss": 0.1082, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.5012108087539673, "rewards/margins": 5.40764856338501, "rewards/rejected": -6.9088592529296875, "step": 7410 }, { "epoch": 1.79, "learning_rate": 2.248618292030665e-07, "logits/chosen": -2.48002028465271, "logits/rejected": -2.403534173965454, "logps/chosen": -285.1266174316406, "logps/rejected": -304.3287048339844, "loss": 0.0949, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.2731415033340454, "rewards/margins": 5.922009468078613, "rewards/rejected": -7.195151329040527, "step": 7420 }, { "epoch": 1.79, "learning_rate": 2.244161169548939e-07, "logits/chosen": -2.6067662239074707, "logits/rejected": -2.5508029460906982, "logps/chosen": -210.6913299560547, "logps/rejected": -247.1603240966797, "loss": 0.0915, "rewards/accuracies": 1.0, "rewards/chosen": -0.37648916244506836, "rewards/margins": 7.376882076263428, "rewards/rejected": -7.7533721923828125, "step": 7430 }, { "epoch": 1.79, "learning_rate": 2.2397040470672133e-07, "logits/chosen": -2.5132429599761963, "logits/rejected": -2.358344554901123, "logps/chosen": -206.2704315185547, "logps/rejected": -258.94976806640625, "loss": 0.1005, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.4994385242462158, "rewards/margins": 6.546236515045166, "rewards/rejected": -8.045675277709961, "step": 7440 }, { "epoch": 1.79, "learning_rate": 2.2352469245854873e-07, "logits/chosen": -2.5345187187194824, "logits/rejected": -2.507821559906006, "logps/chosen": -227.01364135742188, "logps/rejected": -280.1637268066406, "loss": 0.1263, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.0459386110305786, "rewards/margins": 6.257880210876465, "rewards/rejected": -7.303818702697754, "step": 7450 }, { "epoch": 1.8, "learning_rate": 2.2307898021037616e-07, "logits/chosen": -2.5831310749053955, "logits/rejected": -2.413195848464966, "logps/chosen": -259.00787353515625, "logps/rejected": -315.9976501464844, "loss": 0.1144, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.8815778493881226, "rewards/margins": 8.863349914550781, "rewards/rejected": -7.981772422790527, "step": 7460 }, { "epoch": 1.8, "learning_rate": 2.226332679622036e-07, "logits/chosen": -2.3800718784332275, "logits/rejected": -2.3832552433013916, "logps/chosen": -300.4321594238281, "logps/rejected": -449.7889709472656, "loss": 0.0689, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.7963374257087708, "rewards/margins": 8.301736831665039, "rewards/rejected": -9.098074913024902, "step": 7470 }, { "epoch": 1.8, "learning_rate": 2.22187555714031e-07, "logits/chosen": -2.6018245220184326, "logits/rejected": -2.4382550716400146, "logps/chosen": -271.9831848144531, "logps/rejected": -292.10748291015625, "loss": 0.0604, "rewards/accuracies": 1.0, "rewards/chosen": 0.8728191256523132, "rewards/margins": 9.360101699829102, "rewards/rejected": -8.487282752990723, "step": 7480 }, { "epoch": 1.8, "learning_rate": 2.2174184346585842e-07, "logits/chosen": -2.417644739151001, "logits/rejected": -2.4956912994384766, "logps/chosen": -223.18490600585938, "logps/rejected": -338.0353088378906, "loss": 0.0604, "rewards/accuracies": 1.0, "rewards/chosen": -0.7294325828552246, "rewards/margins": 7.016575813293457, "rewards/rejected": -7.746008396148682, "step": 7490 }, { "epoch": 1.81, "learning_rate": 2.2129613121768585e-07, "logits/chosen": -2.5275511741638184, "logits/rejected": -2.478214979171753, "logps/chosen": -226.3917999267578, "logps/rejected": -317.48138427734375, "loss": 0.0789, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.673362374305725, "rewards/margins": 8.482213973999023, "rewards/rejected": -10.1555757522583, "step": 7500 }, { "epoch": 1.81, "learning_rate": 2.2085041896951328e-07, "logits/chosen": -2.5859923362731934, "logits/rejected": -2.473111629486084, "logps/chosen": -231.0122528076172, "logps/rejected": -317.8660583496094, "loss": 0.1009, "rewards/accuracies": 1.0, "rewards/chosen": 0.7452360391616821, "rewards/margins": 10.408421516418457, "rewards/rejected": -9.663185119628906, "step": 7510 }, { "epoch": 1.81, "learning_rate": 2.204047067213407e-07, "logits/chosen": -2.47501540184021, "logits/rejected": -2.3478808403015137, "logps/chosen": -265.54730224609375, "logps/rejected": -473.728271484375, "loss": 0.0449, "rewards/accuracies": 1.0, "rewards/chosen": 0.27362900972366333, "rewards/margins": 15.523233413696289, "rewards/rejected": -15.249605178833008, "step": 7520 }, { "epoch": 1.81, "learning_rate": 2.199589944731681e-07, "logits/chosen": -2.5425233840942383, "logits/rejected": -2.6027400493621826, "logps/chosen": -201.32948303222656, "logps/rejected": -348.94110107421875, "loss": 0.1444, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.6131919622421265, "rewards/margins": 7.723005771636963, "rewards/rejected": -8.336198806762695, "step": 7530 }, { "epoch": 1.81, "learning_rate": 2.1951328222499554e-07, "logits/chosen": -2.5663869380950928, "logits/rejected": -2.2703213691711426, "logps/chosen": -276.5305480957031, "logps/rejected": -347.3857421875, "loss": 0.1505, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.16045208275318146, "rewards/margins": 7.604345798492432, "rewards/rejected": -7.4438934326171875, "step": 7540 }, { "epoch": 1.82, "learning_rate": 2.1906756997682297e-07, "logits/chosen": -2.595304489135742, "logits/rejected": -2.607177257537842, "logps/chosen": -234.88998413085938, "logps/rejected": -311.6211853027344, "loss": 0.1163, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.3873317241668701, "rewards/margins": 7.996423244476318, "rewards/rejected": -9.383755683898926, "step": 7550 }, { "epoch": 1.82, "learning_rate": 2.1862185772865037e-07, "logits/chosen": -2.367216110229492, "logits/rejected": -2.351743221282959, "logps/chosen": -224.83804321289062, "logps/rejected": -285.729248046875, "loss": 0.08, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.2346243858337402, "rewards/margins": 6.1820759773254395, "rewards/rejected": -8.41670036315918, "step": 7560 }, { "epoch": 1.82, "learning_rate": 2.181761454804778e-07, "logits/chosen": -2.6036853790283203, "logits/rejected": -2.607869863510132, "logps/chosen": -227.0845489501953, "logps/rejected": -282.7878112792969, "loss": 0.0808, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.383934736251831, "rewards/margins": 6.9649834632873535, "rewards/rejected": -8.348918914794922, "step": 7570 }, { "epoch": 1.82, "learning_rate": 2.1773043323230523e-07, "logits/chosen": -2.512070894241333, "logits/rejected": -2.538114070892334, "logps/chosen": -202.14749145507812, "logps/rejected": -232.96035766601562, "loss": 0.1065, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6353708505630493, "rewards/margins": 5.990879535675049, "rewards/rejected": -6.626250267028809, "step": 7580 }, { "epoch": 1.83, "learning_rate": 2.1728472098413263e-07, "logits/chosen": -2.6221089363098145, "logits/rejected": -2.503613233566284, "logps/chosen": -327.0585021972656, "logps/rejected": -374.91986083984375, "loss": 0.1566, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.7350888848304749, "rewards/margins": 8.144794464111328, "rewards/rejected": -7.40970516204834, "step": 7590 }, { "epoch": 1.83, "learning_rate": 2.1683900873596006e-07, "logits/chosen": -2.4637551307678223, "logits/rejected": -2.480011463165283, "logps/chosen": -165.88694763183594, "logps/rejected": -297.799560546875, "loss": 0.1515, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0971057415008545, "rewards/margins": 6.92633581161499, "rewards/rejected": -8.023443222045898, "step": 7600 }, { "epoch": 1.83, "eval_logits/chosen": -2.289604425430298, "eval_logits/rejected": -2.2513480186462402, "eval_logps/chosen": -246.97955322265625, "eval_logps/rejected": -263.99462890625, "eval_loss": 0.5241956114768982, "eval_rewards/accuracies": 0.6625000238418579, "eval_rewards/chosen": -4.505197525024414, "eval_rewards/margins": 2.9411861896514893, "eval_rewards/rejected": -7.446383476257324, "eval_runtime": 131.2772, "eval_samples_per_second": 24.041, "eval_steps_per_second": 0.381, "step": 7600 }, { "epoch": 1.83, "learning_rate": 2.163932964877875e-07, "logits/chosen": -2.4689695835113525, "logits/rejected": -2.4437785148620605, "logps/chosen": -217.9733123779297, "logps/rejected": -274.4111022949219, "loss": 0.0901, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.3212761878967285, "rewards/margins": 7.479835510253906, "rewards/rejected": -8.801111221313477, "step": 7610 }, { "epoch": 1.83, "learning_rate": 2.159475842396149e-07, "logits/chosen": -2.4899864196777344, "logits/rejected": -2.4517159461975098, "logps/chosen": -279.220458984375, "logps/rejected": -336.27557373046875, "loss": 0.131, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.2590699195861816, "rewards/margins": 7.154644966125488, "rewards/rejected": -9.413714408874512, "step": 7620 }, { "epoch": 1.84, "learning_rate": 2.1550187199144233e-07, "logits/chosen": -2.489950180053711, "logits/rejected": -2.5148751735687256, "logps/chosen": -272.50115966796875, "logps/rejected": -339.0401611328125, "loss": 0.0984, "rewards/accuracies": 1.0, "rewards/chosen": -0.8389506340026855, "rewards/margins": 7.193517208099365, "rewards/rejected": -8.032468795776367, "step": 7630 }, { "epoch": 1.84, "learning_rate": 2.1505615974326973e-07, "logits/chosen": -2.546099901199341, "logits/rejected": -2.6154778003692627, "logps/chosen": -234.4697723388672, "logps/rejected": -371.752197265625, "loss": 0.1064, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.3317568302154541, "rewards/margins": 9.722246170043945, "rewards/rejected": -9.390490531921387, "step": 7640 }, { "epoch": 1.84, "learning_rate": 2.1461044749509716e-07, "logits/chosen": -2.612776279449463, "logits/rejected": -2.5559134483337402, "logps/chosen": -285.86517333984375, "logps/rejected": -401.9395446777344, "loss": 0.0716, "rewards/accuracies": 1.0, "rewards/chosen": -0.22804859280586243, "rewards/margins": 9.717254638671875, "rewards/rejected": -9.945302963256836, "step": 7650 }, { "epoch": 1.84, "learning_rate": 2.141647352469246e-07, "logits/chosen": -2.542107343673706, "logits/rejected": -2.551405429840088, "logps/chosen": -289.79815673828125, "logps/rejected": -415.26318359375, "loss": 0.2417, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.5029503107070923, "rewards/margins": 8.917051315307617, "rewards/rejected": -8.414101600646973, "step": 7660 }, { "epoch": 1.85, "learning_rate": 2.13719022998752e-07, "logits/chosen": -2.834725856781006, "logits/rejected": -2.566082715988159, "logps/chosen": -389.8315734863281, "logps/rejected": -338.5687561035156, "loss": 0.1113, "rewards/accuracies": 1.0, "rewards/chosen": -0.2165495604276657, "rewards/margins": 8.31263542175293, "rewards/rejected": -8.529186248779297, "step": 7670 }, { "epoch": 1.85, "learning_rate": 2.1327331075057942e-07, "logits/chosen": -2.5547666549682617, "logits/rejected": -2.485154628753662, "logps/chosen": -315.56103515625, "logps/rejected": -315.5398864746094, "loss": 0.0542, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5800116658210754, "rewards/margins": 7.743584632873535, "rewards/rejected": -8.323596000671387, "step": 7680 }, { "epoch": 1.85, "learning_rate": 2.1282759850240685e-07, "logits/chosen": -2.613180160522461, "logits/rejected": -2.571366548538208, "logps/chosen": -224.80276489257812, "logps/rejected": -281.1871032714844, "loss": 0.1117, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1531640738248825, "rewards/margins": 7.546097755432129, "rewards/rejected": -7.699261665344238, "step": 7690 }, { "epoch": 1.85, "learning_rate": 2.1238188625423425e-07, "logits/chosen": -2.445774555206299, "logits/rejected": -2.4282386302948, "logps/chosen": -205.8231658935547, "logps/rejected": -331.0657653808594, "loss": 0.1229, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.3187758922576904, "rewards/margins": 8.039289474487305, "rewards/rejected": -10.358064651489258, "step": 7700 }, { "epoch": 1.86, "learning_rate": 2.1193617400606168e-07, "logits/chosen": -2.6719584465026855, "logits/rejected": -2.55678653717041, "logps/chosen": -261.1755676269531, "logps/rejected": -225.02392578125, "loss": 0.1014, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.0180306434631348, "rewards/margins": 4.9081950187683105, "rewards/rejected": -6.9262261390686035, "step": 7710 }, { "epoch": 1.86, "learning_rate": 2.1149046175788908e-07, "logits/chosen": -2.4505276679992676, "logits/rejected": -2.4282736778259277, "logps/chosen": -238.9077911376953, "logps/rejected": -333.31561279296875, "loss": 0.0549, "rewards/accuracies": 1.0, "rewards/chosen": -1.1426715850830078, "rewards/margins": 6.513665199279785, "rewards/rejected": -7.656336784362793, "step": 7720 }, { "epoch": 1.86, "learning_rate": 2.110447495097165e-07, "logits/chosen": -2.54573392868042, "logits/rejected": -2.5105884075164795, "logps/chosen": -202.3138427734375, "logps/rejected": -314.7386779785156, "loss": 0.0793, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.504429578781128, "rewards/margins": 8.053789138793945, "rewards/rejected": -9.558218002319336, "step": 7730 }, { "epoch": 1.86, "learning_rate": 2.1059903726154394e-07, "logits/chosen": -2.7421913146972656, "logits/rejected": -2.704651117324829, "logps/chosen": -298.920654296875, "logps/rejected": -291.2439270019531, "loss": 0.1068, "rewards/accuracies": 1.0, "rewards/chosen": -2.1864571571350098, "rewards/margins": 5.482812881469727, "rewards/rejected": -7.6692705154418945, "step": 7740 }, { "epoch": 1.87, "learning_rate": 2.1015332501337135e-07, "logits/chosen": -2.760547399520874, "logits/rejected": -2.7504758834838867, "logps/chosen": -291.6126403808594, "logps/rejected": -340.95819091796875, "loss": 0.1171, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.1536940038204193, "rewards/margins": 7.864290714263916, "rewards/rejected": -8.017983436584473, "step": 7750 }, { "epoch": 1.87, "learning_rate": 2.0970761276519877e-07, "logits/chosen": -2.6722984313964844, "logits/rejected": -2.6949880123138428, "logps/chosen": -296.4515686035156, "logps/rejected": -405.2401123046875, "loss": 0.0943, "rewards/accuracies": 1.0, "rewards/chosen": -0.8006378412246704, "rewards/margins": 8.26314926147461, "rewards/rejected": -9.063787460327148, "step": 7760 }, { "epoch": 1.87, "learning_rate": 2.092619005170262e-07, "logits/chosen": -2.6990954875946045, "logits/rejected": -2.689490556716919, "logps/chosen": -374.5258483886719, "logps/rejected": -390.0920104980469, "loss": 0.0524, "rewards/accuracies": 1.0, "rewards/chosen": -0.060518957674503326, "rewards/margins": 9.205659866333008, "rewards/rejected": -9.266180038452148, "step": 7770 }, { "epoch": 1.87, "learning_rate": 2.088161882688536e-07, "logits/chosen": -2.734041690826416, "logits/rejected": -2.6701254844665527, "logps/chosen": -272.4217224121094, "logps/rejected": -321.0535583496094, "loss": 0.0925, "rewards/accuracies": 1.0, "rewards/chosen": -1.9945024251937866, "rewards/margins": 6.5279364585876465, "rewards/rejected": -8.522439002990723, "step": 7780 }, { "epoch": 1.87, "learning_rate": 2.0837047602068104e-07, "logits/chosen": -2.759873628616333, "logits/rejected": -2.594285488128662, "logps/chosen": -309.46551513671875, "logps/rejected": -313.20831298828125, "loss": 0.0754, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.3565568923950195, "rewards/margins": 6.081301689147949, "rewards/rejected": -7.437858581542969, "step": 7790 }, { "epoch": 1.88, "learning_rate": 2.0792476377250844e-07, "logits/chosen": -2.5614938735961914, "logits/rejected": -2.6000454425811768, "logps/chosen": -335.32269287109375, "logps/rejected": -450.3589782714844, "loss": 0.092, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.614913284778595, "rewards/margins": 10.81617546081543, "rewards/rejected": -11.431089401245117, "step": 7800 }, { "epoch": 1.88, "learning_rate": 2.0747905152433587e-07, "logits/chosen": -2.6038460731506348, "logits/rejected": -2.6459014415740967, "logps/chosen": -275.76348876953125, "logps/rejected": -297.15264892578125, "loss": 0.1195, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.7798149585723877, "rewards/margins": 4.796724796295166, "rewards/rejected": -8.576539039611816, "step": 7810 }, { "epoch": 1.88, "learning_rate": 2.070333392761633e-07, "logits/chosen": -2.5913760662078857, "logits/rejected": -2.495664596557617, "logps/chosen": -226.21505737304688, "logps/rejected": -314.0357360839844, "loss": 0.1211, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9873256683349609, "rewards/margins": 8.476485252380371, "rewards/rejected": -9.463809967041016, "step": 7820 }, { "epoch": 1.88, "learning_rate": 2.065876270279907e-07, "logits/chosen": -2.530057191848755, "logits/rejected": -2.4276559352874756, "logps/chosen": -285.4584655761719, "logps/rejected": -325.4349365234375, "loss": 0.0661, "rewards/accuracies": 1.0, "rewards/chosen": -2.640655279159546, "rewards/margins": 7.890588283538818, "rewards/rejected": -10.531244277954102, "step": 7830 }, { "epoch": 1.89, "learning_rate": 2.0614191477981813e-07, "logits/chosen": -2.2655327320098877, "logits/rejected": -2.3299734592437744, "logps/chosen": -259.01690673828125, "logps/rejected": -343.3759765625, "loss": 0.0898, "rewards/accuracies": 1.0, "rewards/chosen": -1.0912749767303467, "rewards/margins": 10.160661697387695, "rewards/rejected": -11.251936912536621, "step": 7840 }, { "epoch": 1.89, "learning_rate": 2.0569620253164559e-07, "logits/chosen": -2.5364832878112793, "logits/rejected": -2.5112829208374023, "logps/chosen": -181.44210815429688, "logps/rejected": -203.41616821289062, "loss": 0.1258, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.22523832321167, "rewards/margins": 5.641473293304443, "rewards/rejected": -7.866711616516113, "step": 7850 }, { "epoch": 1.89, "learning_rate": 2.05250490283473e-07, "logits/chosen": -2.453578472137451, "logits/rejected": -2.540310859680176, "logps/chosen": -306.55560302734375, "logps/rejected": -313.5611267089844, "loss": 0.0823, "rewards/accuracies": 1.0, "rewards/chosen": -1.584228515625, "rewards/margins": 7.540985107421875, "rewards/rejected": -9.125212669372559, "step": 7860 }, { "epoch": 1.89, "learning_rate": 2.0480477803530042e-07, "logits/chosen": -2.7184207439422607, "logits/rejected": -2.6624531745910645, "logps/chosen": -398.5353698730469, "logps/rejected": -377.66168212890625, "loss": 0.1111, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0141246318817139, "rewards/margins": 7.387277126312256, "rewards/rejected": -8.401402473449707, "step": 7870 }, { "epoch": 1.9, "learning_rate": 2.0435906578712782e-07, "logits/chosen": -2.646336078643799, "logits/rejected": -2.4954798221588135, "logps/chosen": -244.2439727783203, "logps/rejected": -425.41766357421875, "loss": 0.1331, "rewards/accuracies": 1.0, "rewards/chosen": 0.2392260581254959, "rewards/margins": 12.841867446899414, "rewards/rejected": -12.602640151977539, "step": 7880 }, { "epoch": 1.9, "learning_rate": 2.0391335353895525e-07, "logits/chosen": -2.645211696624756, "logits/rejected": -2.5931031703948975, "logps/chosen": -283.30535888671875, "logps/rejected": -359.7178649902344, "loss": 0.0628, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.39487886428833, "rewards/margins": 7.769004821777344, "rewards/rejected": -10.163884162902832, "step": 7890 }, { "epoch": 1.9, "learning_rate": 2.0346764129078268e-07, "logits/chosen": -2.857659101486206, "logits/rejected": -2.7472076416015625, "logps/chosen": -340.81488037109375, "logps/rejected": -340.79730224609375, "loss": 0.0905, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2454087734222412, "rewards/margins": 6.279747009277344, "rewards/rejected": -7.525155544281006, "step": 7900 }, { "epoch": 1.9, "learning_rate": 2.0302192904261008e-07, "logits/chosen": -2.508478879928589, "logits/rejected": -2.558784246444702, "logps/chosen": -283.2003479003906, "logps/rejected": -312.26715087890625, "loss": 0.1272, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7378164529800415, "rewards/margins": 8.038057327270508, "rewards/rejected": -8.775873184204102, "step": 7910 }, { "epoch": 1.91, "learning_rate": 2.025762167944375e-07, "logits/chosen": -2.6211769580841064, "logits/rejected": -2.499763011932373, "logps/chosen": -341.9508361816406, "logps/rejected": -305.7552795410156, "loss": 0.0431, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.5438544750213623, "rewards/margins": 8.497499465942383, "rewards/rejected": -10.041353225708008, "step": 7920 }, { "epoch": 1.91, "learning_rate": 2.0213050454626494e-07, "logits/chosen": -2.4751362800598145, "logits/rejected": -2.4390580654144287, "logps/chosen": -300.8005065917969, "logps/rejected": -291.78277587890625, "loss": 0.0879, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7827902436256409, "rewards/margins": 7.964527130126953, "rewards/rejected": -8.747318267822266, "step": 7930 }, { "epoch": 1.91, "learning_rate": 2.0168479229809234e-07, "logits/chosen": -2.4628098011016846, "logits/rejected": -2.4127678871154785, "logps/chosen": -173.89230346679688, "logps/rejected": -211.19784545898438, "loss": 0.0951, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.949373483657837, "rewards/margins": 6.480429172515869, "rewards/rejected": -8.429800987243652, "step": 7940 }, { "epoch": 1.91, "learning_rate": 2.0123908004991977e-07, "logits/chosen": -2.6525394916534424, "logits/rejected": -2.556516408920288, "logps/chosen": -341.7573547363281, "logps/rejected": -394.6011657714844, "loss": 0.0849, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8995146751403809, "rewards/margins": 9.777068138122559, "rewards/rejected": -10.676582336425781, "step": 7950 }, { "epoch": 1.92, "learning_rate": 2.0079336780174718e-07, "logits/chosen": -2.611630916595459, "logits/rejected": -2.531885862350464, "logps/chosen": -248.10647583007812, "logps/rejected": -362.81219482421875, "loss": 0.1108, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.161255121231079, "rewards/margins": 6.243734836578369, "rewards/rejected": -8.404989242553711, "step": 7960 }, { "epoch": 1.92, "learning_rate": 2.003476555535746e-07, "logits/chosen": -2.427172899246216, "logits/rejected": -2.468855381011963, "logps/chosen": -229.8157958984375, "logps/rejected": -236.3346405029297, "loss": 0.1272, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.4341981410980225, "rewards/margins": 6.164163112640381, "rewards/rejected": -7.598361015319824, "step": 7970 }, { "epoch": 1.92, "learning_rate": 1.9990194330540203e-07, "logits/chosen": -2.526604413986206, "logits/rejected": -2.4939637184143066, "logps/chosen": -400.15972900390625, "logps/rejected": -459.98602294921875, "loss": 0.0882, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.4898955225944519, "rewards/margins": 11.614582061767578, "rewards/rejected": -11.124686241149902, "step": 7980 }, { "epoch": 1.92, "learning_rate": 1.9945623105722944e-07, "logits/chosen": -2.436354160308838, "logits/rejected": -2.392300844192505, "logps/chosen": -187.17396545410156, "logps/rejected": -299.4290771484375, "loss": 0.1234, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.2302552461624146, "rewards/margins": 9.613070487976074, "rewards/rejected": -10.843326568603516, "step": 7990 }, { "epoch": 1.93, "learning_rate": 1.9901051880905687e-07, "logits/chosen": -2.5178189277648926, "logits/rejected": -2.3450796604156494, "logps/chosen": -333.3482360839844, "logps/rejected": -289.17437744140625, "loss": 0.1152, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.9208691120147705, "rewards/margins": 7.058518886566162, "rewards/rejected": -8.979388236999512, "step": 8000 }, { "epoch": 1.93, "eval_logits/chosen": -2.3184802532196045, "eval_logits/rejected": -2.2822105884552, "eval_logps/chosen": -247.2084197998047, "eval_logps/rejected": -265.16278076171875, "eval_loss": 0.5280002951622009, "eval_rewards/accuracies": 0.6825000047683716, "eval_rewards/chosen": -4.528088569641113, "eval_rewards/margins": 3.0351107120513916, "eval_rewards/rejected": -7.563199996948242, "eval_runtime": 131.0428, "eval_samples_per_second": 24.084, "eval_steps_per_second": 0.382, "step": 8000 }, { "epoch": 1.93, "learning_rate": 1.985648065608843e-07, "logits/chosen": -2.470599889755249, "logits/rejected": -2.4762444496154785, "logps/chosen": -343.68865966796875, "logps/rejected": -401.91668701171875, "loss": 0.1266, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.3179447650909424, "rewards/margins": 8.005379676818848, "rewards/rejected": -9.323324203491211, "step": 8010 }, { "epoch": 1.93, "learning_rate": 1.981190943127117e-07, "logits/chosen": -2.6357977390289307, "logits/rejected": -2.570206880569458, "logps/chosen": -381.3616638183594, "logps/rejected": -264.6903381347656, "loss": 0.0495, "rewards/accuracies": 1.0, "rewards/chosen": -0.46273738145828247, "rewards/margins": 6.4893598556518555, "rewards/rejected": -6.952097415924072, "step": 8020 }, { "epoch": 1.93, "learning_rate": 1.9767338206453913e-07, "logits/chosen": -2.45924711227417, "logits/rejected": -2.4058728218078613, "logps/chosen": -238.2863006591797, "logps/rejected": -353.55218505859375, "loss": 0.1005, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.26092714071273804, "rewards/margins": 10.053898811340332, "rewards/rejected": -10.314826011657715, "step": 8030 }, { "epoch": 1.94, "learning_rate": 1.9722766981636653e-07, "logits/chosen": -2.3735110759735107, "logits/rejected": -2.41943621635437, "logps/chosen": -296.28955078125, "logps/rejected": -335.2906188964844, "loss": 0.1505, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.4181472063064575, "rewards/margins": 6.552776336669922, "rewards/rejected": -7.97092342376709, "step": 8040 }, { "epoch": 1.94, "learning_rate": 1.9678195756819396e-07, "logits/chosen": -2.6774322986602783, "logits/rejected": -2.6747498512268066, "logps/chosen": -339.8834533691406, "logps/rejected": -366.2880554199219, "loss": 0.1936, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.47035661339759827, "rewards/margins": 8.600381851196289, "rewards/rejected": -8.130025863647461, "step": 8050 }, { "epoch": 1.94, "learning_rate": 1.963362453200214e-07, "logits/chosen": -2.3787899017333984, "logits/rejected": -2.351062774658203, "logps/chosen": -216.5155792236328, "logps/rejected": -299.8763732910156, "loss": 0.1035, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.026233578100800514, "rewards/margins": 8.369463920593262, "rewards/rejected": -8.343230247497559, "step": 8060 }, { "epoch": 1.94, "learning_rate": 1.958905330718488e-07, "logits/chosen": -2.3866686820983887, "logits/rejected": -2.2519049644470215, "logps/chosen": -160.90786743164062, "logps/rejected": -215.9598388671875, "loss": 0.0546, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.393612265586853, "rewards/margins": 5.775225639343262, "rewards/rejected": -6.168837547302246, "step": 8070 }, { "epoch": 1.94, "learning_rate": 1.9544482082367622e-07, "logits/chosen": -2.560020923614502, "logits/rejected": -2.4744629859924316, "logps/chosen": -302.98248291015625, "logps/rejected": -415.14093017578125, "loss": 0.1147, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.344792127609253, "rewards/margins": 7.479535102844238, "rewards/rejected": -8.82432746887207, "step": 8080 }, { "epoch": 1.95, "learning_rate": 1.9499910857550365e-07, "logits/chosen": -2.408642292022705, "logits/rejected": -2.4067068099975586, "logps/chosen": -286.95855712890625, "logps/rejected": -325.5104064941406, "loss": 0.0595, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.48605671525001526, "rewards/margins": 7.812626838684082, "rewards/rejected": -7.3265700340271, "step": 8090 }, { "epoch": 1.95, "learning_rate": 1.9455339632733105e-07, "logits/chosen": -2.468109130859375, "logits/rejected": -2.3929147720336914, "logps/chosen": -226.5298309326172, "logps/rejected": -315.2115173339844, "loss": 0.0785, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.891699194908142, "rewards/margins": 7.080783843994141, "rewards/rejected": -8.97248363494873, "step": 8100 }, { "epoch": 1.95, "learning_rate": 1.9410768407915848e-07, "logits/chosen": -2.4556844234466553, "logits/rejected": -2.4953293800354004, "logps/chosen": -217.6414031982422, "logps/rejected": -328.6346740722656, "loss": 0.114, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.800833523273468, "rewards/margins": 7.14700174331665, "rewards/rejected": -7.9478349685668945, "step": 8110 }, { "epoch": 1.95, "learning_rate": 1.9366197183098589e-07, "logits/chosen": -2.6187987327575684, "logits/rejected": -2.5955591201782227, "logps/chosen": -189.52865600585938, "logps/rejected": -362.27752685546875, "loss": 0.09, "rewards/accuracies": 1.0, "rewards/chosen": 0.21108195185661316, "rewards/margins": 11.469135284423828, "rewards/rejected": -11.258051872253418, "step": 8120 }, { "epoch": 1.96, "learning_rate": 1.9321625958281332e-07, "logits/chosen": -2.695669651031494, "logits/rejected": -2.5160300731658936, "logps/chosen": -243.835205078125, "logps/rejected": -310.9700012207031, "loss": 0.1008, "rewards/accuracies": 1.0, "rewards/chosen": 0.0049825431779026985, "rewards/margins": 9.520586967468262, "rewards/rejected": -9.515605926513672, "step": 8130 }, { "epoch": 1.96, "learning_rate": 1.9277054733464074e-07, "logits/chosen": -2.502044200897217, "logits/rejected": -2.4987478256225586, "logps/chosen": -233.9998321533203, "logps/rejected": -363.33001708984375, "loss": 0.0885, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 1.0708898305892944, "rewards/margins": 10.336236000061035, "rewards/rejected": -9.26534652709961, "step": 8140 }, { "epoch": 1.96, "learning_rate": 1.9232483508646815e-07, "logits/chosen": -2.521312952041626, "logits/rejected": -2.4630274772644043, "logps/chosen": -301.7711486816406, "logps/rejected": -283.7261657714844, "loss": 0.1093, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 2.0350699424743652, "rewards/margins": 10.709259986877441, "rewards/rejected": -8.674189567565918, "step": 8150 }, { "epoch": 1.96, "learning_rate": 1.9187912283829558e-07, "logits/chosen": -2.707197904586792, "logits/rejected": -2.4446969032287598, "logps/chosen": -221.1367950439453, "logps/rejected": -221.2825927734375, "loss": 0.1126, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.3409653306007385, "rewards/margins": 5.505112648010254, "rewards/rejected": -5.846077919006348, "step": 8160 }, { "epoch": 1.97, "learning_rate": 1.91433410590123e-07, "logits/chosen": -2.6052212715148926, "logits/rejected": -2.6014931201934814, "logps/chosen": -276.29949951171875, "logps/rejected": -361.728759765625, "loss": 0.1135, "rewards/accuracies": 1.0, "rewards/chosen": 1.200992465019226, "rewards/margins": 8.519786834716797, "rewards/rejected": -7.318794250488281, "step": 8170 }, { "epoch": 1.97, "learning_rate": 1.909876983419504e-07, "logits/chosen": -2.450148105621338, "logits/rejected": -2.456796169281006, "logps/chosen": -244.544921875, "logps/rejected": -320.2922058105469, "loss": 0.1229, "rewards/accuracies": 1.0, "rewards/chosen": 0.8630601167678833, "rewards/margins": 10.178804397583008, "rewards/rejected": -9.315743446350098, "step": 8180 }, { "epoch": 1.97, "learning_rate": 1.9054198609377787e-07, "logits/chosen": -2.668520212173462, "logits/rejected": -2.452944755554199, "logps/chosen": -278.19049072265625, "logps/rejected": -261.8446350097656, "loss": 0.0965, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.7886543869972229, "rewards/margins": 7.251919746398926, "rewards/rejected": -8.040573120117188, "step": 8190 }, { "epoch": 1.97, "learning_rate": 1.900962738456053e-07, "logits/chosen": -2.539135217666626, "logits/rejected": -2.600917100906372, "logps/chosen": -190.87591552734375, "logps/rejected": -362.54400634765625, "loss": 0.081, "rewards/accuracies": 1.0, "rewards/chosen": -0.18095922470092773, "rewards/margins": 11.730324745178223, "rewards/rejected": -11.911282539367676, "step": 8200 }, { "epoch": 1.98, "learning_rate": 1.896505615974327e-07, "logits/chosen": -2.567045211791992, "logits/rejected": -2.5732991695404053, "logps/chosen": -203.4150390625, "logps/rejected": -401.07196044921875, "loss": 0.0967, "rewards/accuracies": 1.0, "rewards/chosen": 0.10424991697072983, "rewards/margins": 10.450787544250488, "rewards/rejected": -10.346536636352539, "step": 8210 }, { "epoch": 1.98, "learning_rate": 1.8920484934926013e-07, "logits/chosen": -2.520174980163574, "logits/rejected": -2.498842716217041, "logps/chosen": -187.52316284179688, "logps/rejected": -244.5631866455078, "loss": 0.0831, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6810253858566284, "rewards/margins": 5.883547782897949, "rewards/rejected": -6.564573764801025, "step": 8220 }, { "epoch": 1.98, "learning_rate": 1.8875913710108753e-07, "logits/chosen": -2.5760746002197266, "logits/rejected": -2.4093546867370605, "logps/chosen": -254.4566650390625, "logps/rejected": -239.0888214111328, "loss": 0.1016, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.9175148010253906, "rewards/margins": 6.227782249450684, "rewards/rejected": -8.145296096801758, "step": 8230 }, { "epoch": 1.98, "learning_rate": 1.8831342485291496e-07, "logits/chosen": -2.740858793258667, "logits/rejected": -2.6939857006073, "logps/chosen": -241.9950714111328, "logps/rejected": -368.31597900390625, "loss": 0.0649, "rewards/accuracies": 1.0, "rewards/chosen": 0.09666530042886734, "rewards/margins": 9.49067497253418, "rewards/rejected": -9.39400863647461, "step": 8240 }, { "epoch": 1.99, "learning_rate": 1.878677126047424e-07, "logits/chosen": -2.4791433811187744, "logits/rejected": -2.488866090774536, "logps/chosen": -301.0170593261719, "logps/rejected": -308.7914733886719, "loss": 0.1834, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.1338343620300293, "rewards/margins": 10.202335357666016, "rewards/rejected": -10.336170196533203, "step": 8250 }, { "epoch": 1.99, "learning_rate": 1.874220003565698e-07, "logits/chosen": -2.692753314971924, "logits/rejected": -2.6837029457092285, "logps/chosen": -295.8060302734375, "logps/rejected": -336.2331848144531, "loss": 0.0899, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.5804893970489502, "rewards/margins": 8.181416511535645, "rewards/rejected": -9.761906623840332, "step": 8260 }, { "epoch": 1.99, "learning_rate": 1.8697628810839722e-07, "logits/chosen": -2.711230993270874, "logits/rejected": -2.5777366161346436, "logps/chosen": -431.20703125, "logps/rejected": -328.12554931640625, "loss": 0.0866, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.21979090571403503, "rewards/margins": 8.251157760620117, "rewards/rejected": -8.031366348266602, "step": 8270 }, { "epoch": 1.99, "learning_rate": 1.8653057586022465e-07, "logits/chosen": -2.6268134117126465, "logits/rejected": -2.579261064529419, "logps/chosen": -268.69842529296875, "logps/rejected": -335.99249267578125, "loss": 0.0869, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 1.1315809488296509, "rewards/margins": 9.498881340026855, "rewards/rejected": -8.367301940917969, "step": 8280 }, { "epoch": 2.0, "learning_rate": 1.8608486361205205e-07, "logits/chosen": -2.5360658168792725, "logits/rejected": -2.4947776794433594, "logps/chosen": -269.78399658203125, "logps/rejected": -336.0529479980469, "loss": 0.1276, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.6951628923416138, "rewards/margins": 8.493033409118652, "rewards/rejected": -9.188196182250977, "step": 8290 }, { "epoch": 2.0, "learning_rate": 1.8563915136387948e-07, "logits/chosen": -2.577726125717163, "logits/rejected": -2.466395616531372, "logps/chosen": -268.5062561035156, "logps/rejected": -222.9132843017578, "loss": 0.0845, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9850286245346069, "rewards/margins": 5.597483158111572, "rewards/rejected": -6.582511901855469, "step": 8300 }, { "epoch": 2.0, "learning_rate": 1.8519343911570688e-07, "logits/chosen": -2.3435440063476562, "logits/rejected": -2.271286725997925, "logps/chosen": -223.3821563720703, "logps/rejected": -361.36309814453125, "loss": 0.1232, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.5280535221099854, "rewards/margins": 8.094234466552734, "rewards/rejected": -9.62228775024414, "step": 8310 }, { "epoch": 2.0, "learning_rate": 1.8474772686753431e-07, "logits/chosen": -2.581578493118286, "logits/rejected": -2.479897975921631, "logps/chosen": -245.2767791748047, "logps/rejected": -291.4203186035156, "loss": 0.0497, "rewards/accuracies": 1.0, "rewards/chosen": -0.8183133006095886, "rewards/margins": 8.31879711151123, "rewards/rejected": -9.137109756469727, "step": 8320 }, { "epoch": 2.0, "learning_rate": 1.8430201461936174e-07, "logits/chosen": -2.479945182800293, "logits/rejected": -2.5628583431243896, "logps/chosen": -193.89761352539062, "logps/rejected": -321.42535400390625, "loss": 0.0361, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5563122630119324, "rewards/margins": 8.874561309814453, "rewards/rejected": -9.43087387084961, "step": 8330 }, { "epoch": 2.01, "learning_rate": 1.8385630237118915e-07, "logits/chosen": -2.727910041809082, "logits/rejected": -2.6726880073547363, "logps/chosen": -283.5306701660156, "logps/rejected": -349.61822509765625, "loss": 0.0322, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.885776698589325, "rewards/margins": 9.441603660583496, "rewards/rejected": -10.327380180358887, "step": 8340 }, { "epoch": 2.01, "learning_rate": 1.8341059012301658e-07, "logits/chosen": -2.425305128097534, "logits/rejected": -2.476073980331421, "logps/chosen": -293.70501708984375, "logps/rejected": -441.30548095703125, "loss": 0.0336, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2522985935211182, "rewards/margins": 9.488914489746094, "rewards/rejected": -10.741212844848633, "step": 8350 }, { "epoch": 2.01, "learning_rate": 1.82964877874844e-07, "logits/chosen": -2.480833053588867, "logits/rejected": -2.5968728065490723, "logps/chosen": -216.1871337890625, "logps/rejected": -294.4857482910156, "loss": 0.0282, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.41751059889793396, "rewards/margins": 7.914503574371338, "rewards/rejected": -8.332013130187988, "step": 8360 }, { "epoch": 2.01, "learning_rate": 1.825191656266714e-07, "logits/chosen": -2.640190601348877, "logits/rejected": -2.6222071647644043, "logps/chosen": -299.1702880859375, "logps/rejected": -288.4884033203125, "loss": 0.0416, "rewards/accuracies": 1.0, "rewards/chosen": -0.215740367770195, "rewards/margins": 8.871757507324219, "rewards/rejected": -9.087498664855957, "step": 8370 }, { "epoch": 2.02, "learning_rate": 1.8207345337849884e-07, "logits/chosen": -2.568044424057007, "logits/rejected": -2.587066411972046, "logps/chosen": -250.3285675048828, "logps/rejected": -306.30609130859375, "loss": 0.0412, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0288197994232178, "rewards/margins": 8.337857246398926, "rewards/rejected": -9.366676330566406, "step": 8380 }, { "epoch": 2.02, "learning_rate": 1.8162774113032624e-07, "logits/chosen": -2.292970657348633, "logits/rejected": -2.2049508094787598, "logps/chosen": -251.81686401367188, "logps/rejected": -368.78204345703125, "loss": 0.0376, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1272547245025635, "rewards/margins": 9.117366790771484, "rewards/rejected": -10.244623184204102, "step": 8390 }, { "epoch": 2.02, "learning_rate": 1.8118202888215367e-07, "logits/chosen": -2.6245510578155518, "logits/rejected": -2.570723295211792, "logps/chosen": -314.4761047363281, "logps/rejected": -315.69873046875, "loss": 0.0385, "rewards/accuracies": 1.0, "rewards/chosen": -1.1873103380203247, "rewards/margins": 7.634018898010254, "rewards/rejected": -8.821329116821289, "step": 8400 }, { "epoch": 2.02, "eval_logits/chosen": -2.321371555328369, "eval_logits/rejected": -2.2849819660186768, "eval_logps/chosen": -251.5195770263672, "eval_logps/rejected": -271.35797119140625, "eval_loss": 0.5478196740150452, "eval_rewards/accuracies": 0.6800000071525574, "eval_rewards/chosen": -4.959201335906982, "eval_rewards/margins": 3.2235164642333984, "eval_rewards/rejected": -8.182718276977539, "eval_runtime": 133.6443, "eval_samples_per_second": 23.615, "eval_steps_per_second": 0.374, "step": 8400 }, { "epoch": 2.02, "learning_rate": 1.807363166339811e-07, "logits/chosen": -2.615668535232544, "logits/rejected": -2.552216053009033, "logps/chosen": -241.84475708007812, "logps/rejected": -424.7079162597656, "loss": 0.0216, "rewards/accuracies": 1.0, "rewards/chosen": -1.0549558401107788, "rewards/margins": 10.097718238830566, "rewards/rejected": -11.152674674987793, "step": 8410 }, { "epoch": 2.03, "learning_rate": 1.802906043858085e-07, "logits/chosen": -2.521404266357422, "logits/rejected": -2.5072107315063477, "logps/chosen": -238.27090454101562, "logps/rejected": -330.8056640625, "loss": 0.0551, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.7115432024002075, "rewards/margins": 9.558634757995605, "rewards/rejected": -10.270176887512207, "step": 8420 }, { "epoch": 2.03, "learning_rate": 1.7984489213763593e-07, "logits/chosen": -2.6884095668792725, "logits/rejected": -2.680422306060791, "logps/chosen": -277.35272216796875, "logps/rejected": -377.1102600097656, "loss": 0.0753, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.961971640586853, "rewards/margins": 10.981986999511719, "rewards/rejected": -11.943960189819336, "step": 8430 }, { "epoch": 2.03, "learning_rate": 1.7939917988946336e-07, "logits/chosen": -2.333134174346924, "logits/rejected": -2.346411943435669, "logps/chosen": -241.6659698486328, "logps/rejected": -254.75167846679688, "loss": 0.0297, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.6574668884277344, "rewards/margins": 7.7597527503967285, "rewards/rejected": -9.417219161987305, "step": 8440 }, { "epoch": 2.03, "learning_rate": 1.7895346764129076e-07, "logits/chosen": -2.623356342315674, "logits/rejected": -2.467191696166992, "logps/chosen": -314.97674560546875, "logps/rejected": -365.6291809082031, "loss": 0.0253, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 1.3737142086029053, "rewards/margins": 11.685809135437012, "rewards/rejected": -10.312093734741211, "step": 8450 }, { "epoch": 2.04, "learning_rate": 1.785077553931182e-07, "logits/chosen": -2.6209094524383545, "logits/rejected": -2.5927417278289795, "logps/chosen": -237.51516723632812, "logps/rejected": -387.4162292480469, "loss": 0.0216, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.13859105110168457, "rewards/margins": 10.451630592346191, "rewards/rejected": -10.590221405029297, "step": 8460 }, { "epoch": 2.04, "learning_rate": 1.780620431449456e-07, "logits/chosen": -2.4766571521759033, "logits/rejected": -2.3246235847473145, "logps/chosen": -221.49423217773438, "logps/rejected": -314.0611267089844, "loss": 0.0348, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.025004005059599876, "rewards/margins": 11.23481559753418, "rewards/rejected": -11.209811210632324, "step": 8470 }, { "epoch": 2.04, "learning_rate": 1.7761633089677302e-07, "logits/chosen": -2.6109936237335205, "logits/rejected": -2.602966785430908, "logps/chosen": -266.33184814453125, "logps/rejected": -320.2018127441406, "loss": 0.0314, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.632354736328125, "rewards/margins": 8.30778980255127, "rewards/rejected": -9.940142631530762, "step": 8480 }, { "epoch": 2.04, "learning_rate": 1.7717061864860045e-07, "logits/chosen": -2.709364891052246, "logits/rejected": -2.57318377494812, "logps/chosen": -276.8520202636719, "logps/rejected": -289.3186950683594, "loss": 0.0293, "rewards/accuracies": 1.0, "rewards/chosen": 0.3175122141838074, "rewards/margins": 9.372842788696289, "rewards/rejected": -9.055330276489258, "step": 8490 }, { "epoch": 2.05, "learning_rate": 1.7672490640042786e-07, "logits/chosen": -2.52427339553833, "logits/rejected": -2.4743118286132812, "logps/chosen": -314.8677673339844, "logps/rejected": -344.07421875, "loss": 0.0234, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.6964309215545654, "rewards/margins": 10.262825012207031, "rewards/rejected": -11.959256172180176, "step": 8500 }, { "epoch": 2.05, "learning_rate": 1.7627919415225529e-07, "logits/chosen": -2.570406913757324, "logits/rejected": -2.551332950592041, "logps/chosen": -209.91061401367188, "logps/rejected": -333.5925598144531, "loss": 0.0214, "rewards/accuracies": 1.0, "rewards/chosen": -0.007298028562217951, "rewards/margins": 13.005849838256836, "rewards/rejected": -13.013150215148926, "step": 8510 }, { "epoch": 2.05, "learning_rate": 1.7583348190408272e-07, "logits/chosen": -2.222724437713623, "logits/rejected": -2.300708293914795, "logps/chosen": -190.5357666015625, "logps/rejected": -275.17681884765625, "loss": 0.0335, "rewards/accuracies": 1.0, "rewards/chosen": 0.722133219242096, "rewards/margins": 11.837811470031738, "rewards/rejected": -11.115676879882812, "step": 8520 }, { "epoch": 2.05, "learning_rate": 1.7538776965591012e-07, "logits/chosen": -2.618381977081299, "logits/rejected": -2.480579137802124, "logps/chosen": -226.8935546875, "logps/rejected": -307.2865295410156, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/chosen": -1.6915401220321655, "rewards/margins": 9.894481658935547, "rewards/rejected": -11.586021423339844, "step": 8530 }, { "epoch": 2.06, "learning_rate": 1.7494205740773757e-07, "logits/chosen": -2.4632339477539062, "logits/rejected": -2.4606950283050537, "logps/chosen": -268.89569091796875, "logps/rejected": -369.653564453125, "loss": 0.0414, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.4387400150299072, "rewards/margins": 9.543800354003906, "rewards/rejected": -11.982542037963867, "step": 8540 }, { "epoch": 2.06, "learning_rate": 1.7449634515956498e-07, "logits/chosen": -2.717900276184082, "logits/rejected": -2.6395182609558105, "logps/chosen": -285.09735107421875, "logps/rejected": -365.9479064941406, "loss": 0.0309, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6911836862564087, "rewards/margins": 10.700658798217773, "rewards/rejected": -11.39184284210205, "step": 8550 }, { "epoch": 2.06, "learning_rate": 1.740506329113924e-07, "logits/chosen": -2.54192852973938, "logits/rejected": -2.5281126499176025, "logps/chosen": -247.7664794921875, "logps/rejected": -295.73883056640625, "loss": 0.0507, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.8046247959136963, "rewards/margins": 8.388303756713867, "rewards/rejected": -10.192930221557617, "step": 8560 }, { "epoch": 2.06, "learning_rate": 1.7360492066321984e-07, "logits/chosen": -2.619554042816162, "logits/rejected": -2.523376703262329, "logps/chosen": -298.53448486328125, "logps/rejected": -338.39825439453125, "loss": 0.0318, "rewards/accuracies": 1.0, "rewards/chosen": -0.2122616469860077, "rewards/margins": 10.543150901794434, "rewards/rejected": -10.755414009094238, "step": 8570 }, { "epoch": 2.06, "learning_rate": 1.7315920841504724e-07, "logits/chosen": -2.56117582321167, "logits/rejected": -2.4610273838043213, "logps/chosen": -262.32806396484375, "logps/rejected": -287.43170166015625, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -0.9256149530410767, "rewards/margins": 8.507223129272461, "rewards/rejected": -9.432836532592773, "step": 8580 }, { "epoch": 2.07, "learning_rate": 1.7271349616687467e-07, "logits/chosen": -2.2090346813201904, "logits/rejected": -2.18257737159729, "logps/chosen": -241.55361938476562, "logps/rejected": -426.50323486328125, "loss": 0.0443, "rewards/accuracies": 1.0, "rewards/chosen": 0.20222394168376923, "rewards/margins": 12.990568161010742, "rewards/rejected": -12.788345336914062, "step": 8590 }, { "epoch": 2.07, "learning_rate": 1.722677839187021e-07, "logits/chosen": -2.5544230937957764, "logits/rejected": -2.4997313022613525, "logps/chosen": -254.62548828125, "logps/rejected": -347.44830322265625, "loss": 0.0162, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7101238965988159, "rewards/margins": 11.792463302612305, "rewards/rejected": -12.502588272094727, "step": 8600 }, { "epoch": 2.07, "learning_rate": 1.718220716705295e-07, "logits/chosen": -2.552060127258301, "logits/rejected": -2.5133891105651855, "logps/chosen": -241.55874633789062, "logps/rejected": -329.78887939453125, "loss": 0.0175, "rewards/accuracies": 1.0, "rewards/chosen": 0.0036016940139234066, "rewards/margins": 11.071989059448242, "rewards/rejected": -11.068387031555176, "step": 8610 }, { "epoch": 2.07, "learning_rate": 1.7137635942235693e-07, "logits/chosen": -2.548405170440674, "logits/rejected": -2.525132656097412, "logps/chosen": -299.3553771972656, "logps/rejected": -380.62060546875, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": -1.139615774154663, "rewards/margins": 12.482218742370605, "rewards/rejected": -13.621835708618164, "step": 8620 }, { "epoch": 2.08, "learning_rate": 1.7093064717418433e-07, "logits/chosen": -2.564290761947632, "logits/rejected": -2.458705425262451, "logps/chosen": -258.10638427734375, "logps/rejected": -362.2889404296875, "loss": 0.0259, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.686469554901123, "rewards/margins": 12.353072166442871, "rewards/rejected": -13.039543151855469, "step": 8630 }, { "epoch": 2.08, "learning_rate": 1.7048493492601176e-07, "logits/chosen": -2.5656790733337402, "logits/rejected": -2.448558807373047, "logps/chosen": -276.85089111328125, "logps/rejected": -294.8529968261719, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/chosen": -0.8730595707893372, "rewards/margins": 10.170073509216309, "rewards/rejected": -11.043131828308105, "step": 8640 }, { "epoch": 2.08, "learning_rate": 1.700392226778392e-07, "logits/chosen": -2.4552650451660156, "logits/rejected": -2.6275129318237305, "logps/chosen": -256.6981506347656, "logps/rejected": -329.18109130859375, "loss": 0.0089, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.57915198802948, "rewards/margins": 10.759608268737793, "rewards/rejected": -12.338760375976562, "step": 8650 }, { "epoch": 2.08, "learning_rate": 1.695935104296666e-07, "logits/chosen": -2.71759033203125, "logits/rejected": -2.5608346462249756, "logps/chosen": -354.6686096191406, "logps/rejected": -412.43792724609375, "loss": 0.0285, "rewards/accuracies": 1.0, "rewards/chosen": 0.8514503240585327, "rewards/margins": 11.856977462768555, "rewards/rejected": -11.00552749633789, "step": 8660 }, { "epoch": 2.09, "learning_rate": 1.6914779818149402e-07, "logits/chosen": -2.5128674507141113, "logits/rejected": -2.454211711883545, "logps/chosen": -361.24237060546875, "logps/rejected": -447.9710998535156, "loss": 0.0392, "rewards/accuracies": 1.0, "rewards/chosen": 0.2542957663536072, "rewards/margins": 11.777315139770508, "rewards/rejected": -11.523018836975098, "step": 8670 }, { "epoch": 2.09, "learning_rate": 1.6870208593332145e-07, "logits/chosen": -2.7541584968566895, "logits/rejected": -2.6328043937683105, "logps/chosen": -364.6342468261719, "logps/rejected": -389.0901794433594, "loss": 0.0273, "rewards/accuracies": 1.0, "rewards/chosen": -0.827517032623291, "rewards/margins": 11.085371971130371, "rewards/rejected": -11.91288948059082, "step": 8680 }, { "epoch": 2.09, "learning_rate": 1.6825637368514886e-07, "logits/chosen": -2.535529375076294, "logits/rejected": -2.356951951980591, "logps/chosen": -330.3153381347656, "logps/rejected": -280.31146240234375, "loss": 0.0289, "rewards/accuracies": 1.0, "rewards/chosen": -0.03817791864275932, "rewards/margins": 10.486616134643555, "rewards/rejected": -10.52479362487793, "step": 8690 }, { "epoch": 2.09, "learning_rate": 1.6781066143697628e-07, "logits/chosen": -2.3027021884918213, "logits/rejected": -2.4584460258483887, "logps/chosen": -214.76657104492188, "logps/rejected": -349.4528503417969, "loss": 0.0642, "rewards/accuracies": 1.0, "rewards/chosen": -1.9708747863769531, "rewards/margins": 11.003325462341309, "rewards/rejected": -12.974202156066895, "step": 8700 }, { "epoch": 2.1, "learning_rate": 1.673649491888037e-07, "logits/chosen": -2.6496386528015137, "logits/rejected": -2.502171277999878, "logps/chosen": -388.9115295410156, "logps/rejected": -360.45062255859375, "loss": 0.0278, "rewards/accuracies": 1.0, "rewards/chosen": -0.633139431476593, "rewards/margins": 10.832531929016113, "rewards/rejected": -11.465669631958008, "step": 8710 }, { "epoch": 2.1, "learning_rate": 1.6691923694063112e-07, "logits/chosen": -2.3688206672668457, "logits/rejected": -2.2172188758850098, "logps/chosen": -245.2628631591797, "logps/rejected": -304.0785217285156, "loss": 0.02, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.1016969680786133, "rewards/margins": 9.271982192993164, "rewards/rejected": -11.373678207397461, "step": 8720 }, { "epoch": 2.1, "learning_rate": 1.6647352469245855e-07, "logits/chosen": -2.4992682933807373, "logits/rejected": -2.3491616249084473, "logps/chosen": -293.691650390625, "logps/rejected": -324.7965393066406, "loss": 0.0352, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.12693621218204498, "rewards/margins": 9.295028686523438, "rewards/rejected": -9.421964645385742, "step": 8730 }, { "epoch": 2.1, "learning_rate": 1.6602781244428595e-07, "logits/chosen": -2.470156192779541, "logits/rejected": -2.3872666358947754, "logps/chosen": -224.85061645507812, "logps/rejected": -311.27691650390625, "loss": 0.0443, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.5656036138534546, "rewards/margins": 9.098166465759277, "rewards/rejected": -8.532563209533691, "step": 8740 }, { "epoch": 2.11, "learning_rate": 1.6558210019611338e-07, "logits/chosen": -2.6150567531585693, "logits/rejected": -2.597041130065918, "logps/chosen": -333.84515380859375, "logps/rejected": -379.3749084472656, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011082172859460115, "rewards/margins": 11.748732566833496, "rewards/rejected": -11.749841690063477, "step": 8750 }, { "epoch": 2.11, "learning_rate": 1.651363879479408e-07, "logits/chosen": -2.4506280422210693, "logits/rejected": -2.3053932189941406, "logps/chosen": -224.02517700195312, "logps/rejected": -269.5003356933594, "loss": 0.0515, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.8991334438323975, "rewards/margins": 7.669316291809082, "rewards/rejected": -9.568449974060059, "step": 8760 }, { "epoch": 2.11, "learning_rate": 1.646906756997682e-07, "logits/chosen": -2.623814344406128, "logits/rejected": -2.5576224327087402, "logps/chosen": -396.124267578125, "logps/rejected": -358.0867919921875, "loss": 0.0279, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.060526300221681595, "rewards/margins": 8.95967960357666, "rewards/rejected": -9.0202054977417, "step": 8770 }, { "epoch": 2.11, "learning_rate": 1.6424496345159564e-07, "logits/chosen": -2.505446672439575, "logits/rejected": -2.4441895484924316, "logps/chosen": -311.7078857421875, "logps/rejected": -399.08648681640625, "loss": 0.034, "rewards/accuracies": 1.0, "rewards/chosen": -3.4337737560272217, "rewards/margins": 12.172263145446777, "rewards/rejected": -15.606036186218262, "step": 8780 }, { "epoch": 2.12, "learning_rate": 1.6379925120342304e-07, "logits/chosen": -2.5672106742858887, "logits/rejected": -2.4547407627105713, "logps/chosen": -197.42898559570312, "logps/rejected": -345.4898376464844, "loss": 0.038, "rewards/accuracies": 1.0, "rewards/chosen": -2.4426276683807373, "rewards/margins": 10.940542221069336, "rewards/rejected": -13.383171081542969, "step": 8790 }, { "epoch": 2.12, "learning_rate": 1.6335353895525047e-07, "logits/chosen": -2.58722186088562, "logits/rejected": -2.413419246673584, "logps/chosen": -265.8213195800781, "logps/rejected": -407.5599060058594, "loss": 0.0401, "rewards/accuracies": 1.0, "rewards/chosen": 0.5724204778671265, "rewards/margins": 12.546555519104004, "rewards/rejected": -11.97413444519043, "step": 8800 }, { "epoch": 2.12, "eval_logits/chosen": -2.2326250076293945, "eval_logits/rejected": -2.192549228668213, "eval_logps/chosen": -263.7903747558594, "eval_logps/rejected": -290.16241455078125, "eval_loss": 0.5999376177787781, "eval_rewards/accuracies": 0.6800000071525574, "eval_rewards/chosen": -6.186283588409424, "eval_rewards/margins": 3.8768796920776367, "eval_rewards/rejected": -10.063161849975586, "eval_runtime": 133.9581, "eval_samples_per_second": 23.56, "eval_steps_per_second": 0.373, "step": 8800 }, { "epoch": 2.12, "learning_rate": 1.629078267070779e-07, "logits/chosen": -2.5198941230773926, "logits/rejected": -2.3723132610321045, "logps/chosen": -258.646240234375, "logps/rejected": -290.8374938964844, "loss": 0.0229, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.203172445297241, "rewards/margins": 7.861981391906738, "rewards/rejected": -10.065153121948242, "step": 8810 }, { "epoch": 2.12, "learning_rate": 1.624621144589053e-07, "logits/chosen": -2.5503334999084473, "logits/rejected": -2.5767085552215576, "logps/chosen": -234.769775390625, "logps/rejected": -370.20281982421875, "loss": 0.0431, "rewards/accuracies": 1.0, "rewards/chosen": -1.087836742401123, "rewards/margins": 10.581189155578613, "rewards/rejected": -11.669024467468262, "step": 8820 }, { "epoch": 2.13, "learning_rate": 1.6201640221073273e-07, "logits/chosen": -2.6861696243286133, "logits/rejected": -2.4844136238098145, "logps/chosen": -285.65020751953125, "logps/rejected": -351.6511535644531, "loss": 0.0398, "rewards/accuracies": 1.0, "rewards/chosen": 0.32659345865249634, "rewards/margins": 11.284768104553223, "rewards/rejected": -10.958174705505371, "step": 8830 }, { "epoch": 2.13, "learning_rate": 1.6157068996256016e-07, "logits/chosen": -2.508152484893799, "logits/rejected": -2.475700855255127, "logps/chosen": -244.31350708007812, "logps/rejected": -310.83624267578125, "loss": 0.032, "rewards/accuracies": 1.0, "rewards/chosen": 0.22390878200531006, "rewards/margins": 10.004617691040039, "rewards/rejected": -9.780708312988281, "step": 8840 }, { "epoch": 2.13, "learning_rate": 1.6112497771438757e-07, "logits/chosen": -2.5587594509124756, "logits/rejected": -2.4645655155181885, "logps/chosen": -262.1879577636719, "logps/rejected": -341.3424377441406, "loss": 0.0359, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.997057318687439, "rewards/margins": 8.987771034240723, "rewards/rejected": -10.984827041625977, "step": 8850 }, { "epoch": 2.13, "learning_rate": 1.60679265466215e-07, "logits/chosen": -2.526792049407959, "logits/rejected": -2.438915967941284, "logps/chosen": -366.299560546875, "logps/rejected": -384.32452392578125, "loss": 0.0383, "rewards/accuracies": 1.0, "rewards/chosen": 1.2667930126190186, "rewards/margins": 14.111457824707031, "rewards/rejected": -12.84466552734375, "step": 8860 }, { "epoch": 2.13, "learning_rate": 1.602335532180424e-07, "logits/chosen": -2.499552011489868, "logits/rejected": -2.5095345973968506, "logps/chosen": -297.86346435546875, "logps/rejected": -385.1635437011719, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/chosen": -0.30837756395339966, "rewards/margins": 11.472579002380371, "rewards/rejected": -11.780957221984863, "step": 8870 }, { "epoch": 2.14, "learning_rate": 1.5978784096986985e-07, "logits/chosen": -2.3323168754577637, "logits/rejected": -2.251751661300659, "logps/chosen": -308.49676513671875, "logps/rejected": -427.1435546875, "loss": 0.0341, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.2195223569869995, "rewards/margins": 14.615882873535156, "rewards/rejected": -15.835405349731445, "step": 8880 }, { "epoch": 2.14, "learning_rate": 1.5934212872169728e-07, "logits/chosen": -2.4065375328063965, "logits/rejected": -2.4848408699035645, "logps/chosen": -292.5906982421875, "logps/rejected": -367.9299011230469, "loss": 0.0171, "rewards/accuracies": 1.0, "rewards/chosen": -0.9609495401382446, "rewards/margins": 10.981375694274902, "rewards/rejected": -11.942326545715332, "step": 8890 }, { "epoch": 2.14, "learning_rate": 1.5889641647352469e-07, "logits/chosen": -2.580648183822632, "logits/rejected": -2.5684657096862793, "logps/chosen": -402.7144775390625, "logps/rejected": -552.5977783203125, "loss": 0.0228, "rewards/accuracies": 1.0, "rewards/chosen": 0.04434077814221382, "rewards/margins": 14.231958389282227, "rewards/rejected": -14.187617301940918, "step": 8900 }, { "epoch": 2.14, "learning_rate": 1.5845070422535212e-07, "logits/chosen": -2.428067922592163, "logits/rejected": -2.332951068878174, "logps/chosen": -314.9877014160156, "logps/rejected": -423.8435974121094, "loss": 0.042, "rewards/accuracies": 1.0, "rewards/chosen": -1.3378690481185913, "rewards/margins": 12.019502639770508, "rewards/rejected": -13.357370376586914, "step": 8910 }, { "epoch": 2.15, "learning_rate": 1.5800499197717954e-07, "logits/chosen": -2.5505106449127197, "logits/rejected": -2.5178935527801514, "logps/chosen": -361.66827392578125, "logps/rejected": -529.9429931640625, "loss": 0.0282, "rewards/accuracies": 1.0, "rewards/chosen": -2.1555678844451904, "rewards/margins": 14.764144897460938, "rewards/rejected": -16.91971206665039, "step": 8920 }, { "epoch": 2.15, "learning_rate": 1.5755927972900695e-07, "logits/chosen": -2.3408703804016113, "logits/rejected": -2.298884153366089, "logps/chosen": -280.03021240234375, "logps/rejected": -350.9729919433594, "loss": 0.0227, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.21215252578258514, "rewards/margins": 13.304191589355469, "rewards/rejected": -13.516342163085938, "step": 8930 }, { "epoch": 2.15, "learning_rate": 1.5711356748083438e-07, "logits/chosen": -2.6067442893981934, "logits/rejected": -2.525693416595459, "logps/chosen": -270.82916259765625, "logps/rejected": -361.995849609375, "loss": 0.0336, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.351936101913452, "rewards/margins": 11.557195663452148, "rewards/rejected": -13.90913200378418, "step": 8940 }, { "epoch": 2.15, "learning_rate": 1.566678552326618e-07, "logits/chosen": -2.5198090076446533, "logits/rejected": -2.43400239944458, "logps/chosen": -255.52359008789062, "logps/rejected": -274.2308349609375, "loss": 0.0428, "rewards/accuracies": 1.0, "rewards/chosen": -0.012747669592499733, "rewards/margins": 11.146052360534668, "rewards/rejected": -11.15880012512207, "step": 8950 }, { "epoch": 2.16, "learning_rate": 1.562221429844892e-07, "logits/chosen": -2.5320210456848145, "logits/rejected": -2.183725118637085, "logps/chosen": -272.8175048828125, "logps/rejected": -276.9789733886719, "loss": 0.0248, "rewards/accuracies": 1.0, "rewards/chosen": -0.9705953598022461, "rewards/margins": 11.81946086883545, "rewards/rejected": -12.790057182312012, "step": 8960 }, { "epoch": 2.16, "learning_rate": 1.5577643073631664e-07, "logits/chosen": -2.348020076751709, "logits/rejected": -2.427987575531006, "logps/chosen": -219.4339141845703, "logps/rejected": -344.757568359375, "loss": 0.0338, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.443242073059082, "rewards/margins": 10.88129997253418, "rewards/rejected": -12.324542045593262, "step": 8970 }, { "epoch": 2.16, "learning_rate": 1.5533071848814404e-07, "logits/chosen": -2.473099946975708, "logits/rejected": -2.4175493717193604, "logps/chosen": -263.54644775390625, "logps/rejected": -292.1980895996094, "loss": 0.032, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.9177930355072021, "rewards/margins": 9.586542129516602, "rewards/rejected": -11.504335403442383, "step": 8980 }, { "epoch": 2.16, "learning_rate": 1.5488500623997147e-07, "logits/chosen": -2.281341314315796, "logits/rejected": -2.402763843536377, "logps/chosen": -240.7819366455078, "logps/rejected": -395.9696960449219, "loss": 0.0214, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.6088535785675049, "rewards/margins": 10.593988418579102, "rewards/rejected": -12.202842712402344, "step": 8990 }, { "epoch": 2.17, "learning_rate": 1.544392939917989e-07, "logits/chosen": -2.353468179702759, "logits/rejected": -2.2845921516418457, "logps/chosen": -203.794921875, "logps/rejected": -358.76873779296875, "loss": 0.0207, "rewards/accuracies": 1.0, "rewards/chosen": -1.8850176334381104, "rewards/margins": 11.59618854522705, "rewards/rejected": -13.481205940246582, "step": 9000 }, { "epoch": 2.17, "learning_rate": 1.539935817436263e-07, "logits/chosen": -2.5114352703094482, "logits/rejected": -2.4373786449432373, "logps/chosen": -254.0144805908203, "logps/rejected": -368.44915771484375, "loss": 0.0518, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6289116144180298, "rewards/margins": 11.79938793182373, "rewards/rejected": -12.428298950195312, "step": 9010 }, { "epoch": 2.17, "learning_rate": 1.5354786949545373e-07, "logits/chosen": -2.257683753967285, "logits/rejected": -2.271179676055908, "logps/chosen": -265.44110107421875, "logps/rejected": -345.773193359375, "loss": 0.0373, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7293133735656738, "rewards/margins": 9.583789825439453, "rewards/rejected": -10.313103675842285, "step": 9020 }, { "epoch": 2.17, "learning_rate": 1.5310215724728116e-07, "logits/chosen": -2.4884238243103027, "logits/rejected": -2.3828537464141846, "logps/chosen": -342.45391845703125, "logps/rejected": -368.4088134765625, "loss": 0.0346, "rewards/accuracies": 1.0, "rewards/chosen": -0.9505392909049988, "rewards/margins": 10.661983489990234, "rewards/rejected": -11.612524032592773, "step": 9030 }, { "epoch": 2.18, "learning_rate": 1.5265644499910856e-07, "logits/chosen": -2.3377490043640137, "logits/rejected": -2.173964023590088, "logps/chosen": -238.179931640625, "logps/rejected": -385.8675537109375, "loss": 0.0367, "rewards/accuracies": 1.0, "rewards/chosen": -0.6198335886001587, "rewards/margins": 11.473251342773438, "rewards/rejected": -12.093084335327148, "step": 9040 }, { "epoch": 2.18, "learning_rate": 1.52210732750936e-07, "logits/chosen": -2.2793710231781006, "logits/rejected": -2.3451266288757324, "logps/chosen": -192.71934509277344, "logps/rejected": -380.89825439453125, "loss": 0.027, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.118816375732422, "rewards/margins": 10.546943664550781, "rewards/rejected": -12.66576099395752, "step": 9050 }, { "epoch": 2.18, "learning_rate": 1.517650205027634e-07, "logits/chosen": -2.259404420852661, "logits/rejected": -2.238851308822632, "logps/chosen": -297.0094299316406, "logps/rejected": -463.2301330566406, "loss": 0.0187, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.126497268676758, "rewards/margins": 14.385139465332031, "rewards/rejected": -16.511638641357422, "step": 9060 }, { "epoch": 2.18, "learning_rate": 1.5131930825459083e-07, "logits/chosen": -2.097804546356201, "logits/rejected": -2.201899528503418, "logps/chosen": -254.0164794921875, "logps/rejected": -321.0382385253906, "loss": 0.0269, "rewards/accuracies": 1.0, "rewards/chosen": -0.39512887597084045, "rewards/margins": 11.358099937438965, "rewards/rejected": -11.753230094909668, "step": 9070 }, { "epoch": 2.19, "learning_rate": 1.5087359600641826e-07, "logits/chosen": -2.2721550464630127, "logits/rejected": -2.36177396774292, "logps/chosen": -297.96405029296875, "logps/rejected": -514.7808837890625, "loss": 0.0302, "rewards/accuracies": 1.0, "rewards/chosen": 1.2175419330596924, "rewards/margins": 18.318937301635742, "rewards/rejected": -17.101394653320312, "step": 9080 }, { "epoch": 2.19, "learning_rate": 1.5042788375824566e-07, "logits/chosen": -2.545522928237915, "logits/rejected": -2.2234930992126465, "logps/chosen": -256.73394775390625, "logps/rejected": -322.40142822265625, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": 0.07155473530292511, "rewards/margins": 11.270914077758789, "rewards/rejected": -11.199357986450195, "step": 9090 }, { "epoch": 2.19, "learning_rate": 1.499821715100731e-07, "logits/chosen": -2.3779549598693848, "logits/rejected": -2.292271375656128, "logps/chosen": -313.8078308105469, "logps/rejected": -379.89447021484375, "loss": 0.0331, "rewards/accuracies": 1.0, "rewards/chosen": -0.6782283782958984, "rewards/margins": 11.749468803405762, "rewards/rejected": -12.427698135375977, "step": 9100 }, { "epoch": 2.19, "learning_rate": 1.4953645926190052e-07, "logits/chosen": -1.9888923168182373, "logits/rejected": -1.8414385318756104, "logps/chosen": -267.9323425292969, "logps/rejected": -376.7801208496094, "loss": 0.0258, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.582380771636963, "rewards/margins": 10.754520416259766, "rewards/rejected": -13.33690071105957, "step": 9110 }, { "epoch": 2.19, "learning_rate": 1.4909074701372792e-07, "logits/chosen": -2.2754411697387695, "logits/rejected": -2.129660129547119, "logps/chosen": -286.35699462890625, "logps/rejected": -302.1578063964844, "loss": 0.0297, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5871866345405579, "rewards/margins": 12.948625564575195, "rewards/rejected": -13.535810470581055, "step": 9120 }, { "epoch": 2.2, "learning_rate": 1.4864503476555535e-07, "logits/chosen": -2.2403056621551514, "logits/rejected": -2.3537936210632324, "logps/chosen": -229.68936157226562, "logps/rejected": -356.7602233886719, "loss": 0.0394, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.8653552532196045, "rewards/margins": 10.975387573242188, "rewards/rejected": -12.840744018554688, "step": 9130 }, { "epoch": 2.2, "learning_rate": 1.4819932251738275e-07, "logits/chosen": -2.247201442718506, "logits/rejected": -2.318659782409668, "logps/chosen": -205.4511260986328, "logps/rejected": -338.76007080078125, "loss": 0.0324, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.45393362641334534, "rewards/margins": 12.517606735229492, "rewards/rejected": -12.971542358398438, "step": 9140 }, { "epoch": 2.2, "learning_rate": 1.4775361026921018e-07, "logits/chosen": -2.3011038303375244, "logits/rejected": -2.273911952972412, "logps/chosen": -243.8572235107422, "logps/rejected": -332.92230224609375, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -2.181957244873047, "rewards/margins": 11.115522384643555, "rewards/rejected": -13.297480583190918, "step": 9150 }, { "epoch": 2.2, "learning_rate": 1.473078980210376e-07, "logits/chosen": -2.3728251457214355, "logits/rejected": -2.3403146266937256, "logps/chosen": -302.58087158203125, "logps/rejected": -281.1512145996094, "loss": 0.0316, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.0829882621765137, "rewards/margins": 8.896230697631836, "rewards/rejected": -10.979219436645508, "step": 9160 }, { "epoch": 2.21, "learning_rate": 1.46862185772865e-07, "logits/chosen": -2.5427348613739014, "logits/rejected": -2.3668570518493652, "logps/chosen": -266.931884765625, "logps/rejected": -340.0614929199219, "loss": 0.0579, "rewards/accuracies": 1.0, "rewards/chosen": -0.6020795106887817, "rewards/margins": 11.8134183883667, "rewards/rejected": -12.415498733520508, "step": 9170 }, { "epoch": 2.21, "learning_rate": 1.4641647352469244e-07, "logits/chosen": -2.518648386001587, "logits/rejected": -2.428374767303467, "logps/chosen": -240.01028442382812, "logps/rejected": -456.9060974121094, "loss": 0.0194, "rewards/accuracies": 1.0, "rewards/chosen": 0.8487393260002136, "rewards/margins": 13.704450607299805, "rewards/rejected": -12.855712890625, "step": 9180 }, { "epoch": 2.21, "learning_rate": 1.4597076127651987e-07, "logits/chosen": -2.5084991455078125, "logits/rejected": -2.3670554161071777, "logps/chosen": -328.0556335449219, "logps/rejected": -345.2771911621094, "loss": 0.0201, "rewards/accuracies": 1.0, "rewards/chosen": 0.7515302300453186, "rewards/margins": 11.287810325622559, "rewards/rejected": -10.536279678344727, "step": 9190 }, { "epoch": 2.21, "learning_rate": 1.4552504902834727e-07, "logits/chosen": -2.4443435668945312, "logits/rejected": -2.267771005630493, "logps/chosen": -335.3075256347656, "logps/rejected": -304.65771484375, "loss": 0.0327, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6059294939041138, "rewards/margins": 10.79488468170166, "rewards/rejected": -11.400813102722168, "step": 9200 }, { "epoch": 2.21, "eval_logits/chosen": -2.174819231033325, "eval_logits/rejected": -2.1368966102600098, "eval_logps/chosen": -258.5181579589844, "eval_logps/rejected": -283.9364929199219, "eval_loss": 0.6189878582954407, "eval_rewards/accuracies": 0.6924999952316284, "eval_rewards/chosen": -5.659062385559082, "eval_rewards/margins": 3.781506299972534, "eval_rewards/rejected": -9.440567970275879, "eval_runtime": 134.5581, "eval_samples_per_second": 23.455, "eval_steps_per_second": 0.372, "step": 9200 }, { "epoch": 2.22, "learning_rate": 1.450793367801747e-07, "logits/chosen": -2.3690028190612793, "logits/rejected": -2.3312017917633057, "logps/chosen": -312.51177978515625, "logps/rejected": -377.66497802734375, "loss": 0.0456, "rewards/accuracies": 1.0, "rewards/chosen": -2.505537509918213, "rewards/margins": 11.056896209716797, "rewards/rejected": -13.562433242797852, "step": 9210 }, { "epoch": 2.22, "learning_rate": 1.4463362453200213e-07, "logits/chosen": -2.3457257747650146, "logits/rejected": -2.2673184871673584, "logps/chosen": -229.06680297851562, "logps/rejected": -268.76226806640625, "loss": 0.0524, "rewards/accuracies": 1.0, "rewards/chosen": -0.9529112577438354, "rewards/margins": 11.679685592651367, "rewards/rejected": -12.632596015930176, "step": 9220 }, { "epoch": 2.22, "learning_rate": 1.4418791228382956e-07, "logits/chosen": -2.6629931926727295, "logits/rejected": -2.4545934200286865, "logps/chosen": -278.45648193359375, "logps/rejected": -302.12237548828125, "loss": 0.0338, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.7727594971656799, "rewards/margins": 9.006889343261719, "rewards/rejected": -9.77964973449707, "step": 9230 }, { "epoch": 2.22, "learning_rate": 1.43742200035657e-07, "logits/chosen": -2.4864726066589355, "logits/rejected": -2.534029245376587, "logps/chosen": -261.3238830566406, "logps/rejected": -354.3556213378906, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": -1.2573440074920654, "rewards/margins": 11.578329086303711, "rewards/rejected": -12.835672378540039, "step": 9240 }, { "epoch": 2.23, "learning_rate": 1.432964877874844e-07, "logits/chosen": -2.523289203643799, "logits/rejected": -2.1942954063415527, "logps/chosen": -292.62457275390625, "logps/rejected": -314.6795959472656, "loss": 0.0425, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.6800389289855957, "rewards/margins": 10.989094734191895, "rewards/rejected": -13.669133186340332, "step": 9250 }, { "epoch": 2.23, "learning_rate": 1.4285077553931182e-07, "logits/chosen": -2.3437511920928955, "logits/rejected": -2.215064287185669, "logps/chosen": -241.7512664794922, "logps/rejected": -386.5013122558594, "loss": 0.032, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.681990146636963, "rewards/margins": 11.260754585266113, "rewards/rejected": -13.942744255065918, "step": 9260 }, { "epoch": 2.23, "learning_rate": 1.4240506329113925e-07, "logits/chosen": -2.45466947555542, "logits/rejected": -2.4046263694763184, "logps/chosen": -256.34759521484375, "logps/rejected": -310.4484558105469, "loss": 0.0384, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5057967901229858, "rewards/margins": 10.491201400756836, "rewards/rejected": -10.99699878692627, "step": 9270 }, { "epoch": 2.23, "learning_rate": 1.4195935104296666e-07, "logits/chosen": -2.4198827743530273, "logits/rejected": -2.445094585418701, "logps/chosen": -197.35574340820312, "logps/rejected": -301.61248779296875, "loss": 0.0289, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.170269012451172, "rewards/margins": 7.697892665863037, "rewards/rejected": -10.868162155151367, "step": 9280 }, { "epoch": 2.24, "learning_rate": 1.4151363879479409e-07, "logits/chosen": -2.3239498138427734, "logits/rejected": -2.255598783493042, "logps/chosen": -312.5681457519531, "logps/rejected": -319.4649963378906, "loss": 0.0298, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.643101453781128, "rewards/margins": 9.848406791687012, "rewards/rejected": -12.491508483886719, "step": 9290 }, { "epoch": 2.24, "learning_rate": 1.4106792654662152e-07, "logits/chosen": -2.5454611778259277, "logits/rejected": -2.4494545459747314, "logps/chosen": -295.7924499511719, "logps/rejected": -341.0743713378906, "loss": 0.0287, "rewards/accuracies": 1.0, "rewards/chosen": 0.5382768511772156, "rewards/margins": 12.340603828430176, "rewards/rejected": -11.802328109741211, "step": 9300 }, { "epoch": 2.24, "learning_rate": 1.4062221429844892e-07, "logits/chosen": -2.313345193862915, "logits/rejected": -2.2957451343536377, "logps/chosen": -271.76678466796875, "logps/rejected": -425.8828125, "loss": 0.0482, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -4.048810005187988, "rewards/margins": 9.32423210144043, "rewards/rejected": -13.373041152954102, "step": 9310 }, { "epoch": 2.24, "learning_rate": 1.4017650205027635e-07, "logits/chosen": -2.4318947792053223, "logits/rejected": -2.4367456436157227, "logps/chosen": -286.40216064453125, "logps/rejected": -350.0318298339844, "loss": 0.1116, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.9922993183135986, "rewards/margins": 9.691668510437012, "rewards/rejected": -11.683968544006348, "step": 9320 }, { "epoch": 2.25, "learning_rate": 1.3973078980210375e-07, "logits/chosen": -2.4391160011291504, "logits/rejected": -2.349377155303955, "logps/chosen": -263.29931640625, "logps/rejected": -358.6832580566406, "loss": 0.0213, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.355463981628418, "rewards/margins": 9.626498222351074, "rewards/rejected": -13.981962203979492, "step": 9330 }, { "epoch": 2.25, "learning_rate": 1.3928507755393118e-07, "logits/chosen": -2.3661608695983887, "logits/rejected": -2.504918336868286, "logps/chosen": -315.29913330078125, "logps/rejected": -415.14288330078125, "loss": 0.0393, "rewards/accuracies": 1.0, "rewards/chosen": -1.096774697303772, "rewards/margins": 12.709527969360352, "rewards/rejected": -13.806302070617676, "step": 9340 }, { "epoch": 2.25, "learning_rate": 1.388393653057586e-07, "logits/chosen": -2.566204309463501, "logits/rejected": -2.4146459102630615, "logps/chosen": -334.0547790527344, "logps/rejected": -343.410888671875, "loss": 0.0254, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7288995981216431, "rewards/margins": 9.186012268066406, "rewards/rejected": -9.914911270141602, "step": 9350 }, { "epoch": 2.25, "learning_rate": 1.38393653057586e-07, "logits/chosen": -2.3976540565490723, "logits/rejected": -2.289386510848999, "logps/chosen": -207.6011505126953, "logps/rejected": -277.0570373535156, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/chosen": -0.43546366691589355, "rewards/margins": 10.367945671081543, "rewards/rejected": -10.8034086227417, "step": 9360 }, { "epoch": 2.26, "learning_rate": 1.3794794080941344e-07, "logits/chosen": -2.543123245239258, "logits/rejected": -2.4102225303649902, "logps/chosen": -307.92352294921875, "logps/rejected": -356.69488525390625, "loss": 0.0328, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2872573137283325, "rewards/margins": 11.00304126739502, "rewards/rejected": -12.290298461914062, "step": 9370 }, { "epoch": 2.26, "learning_rate": 1.3750222856124087e-07, "logits/chosen": -2.5676543712615967, "logits/rejected": -2.453909397125244, "logps/chosen": -293.49224853515625, "logps/rejected": -318.1600036621094, "loss": 0.0499, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.7830958366394043, "rewards/margins": 11.916948318481445, "rewards/rejected": -12.700042724609375, "step": 9380 }, { "epoch": 2.26, "learning_rate": 1.3705651631306827e-07, "logits/chosen": -2.337442398071289, "logits/rejected": -2.2764225006103516, "logps/chosen": -198.6411895751953, "logps/rejected": -290.4046936035156, "loss": 0.0593, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.507361650466919, "rewards/margins": 11.443794250488281, "rewards/rejected": -12.951156616210938, "step": 9390 }, { "epoch": 2.26, "learning_rate": 1.366108040648957e-07, "logits/chosen": -2.4181835651397705, "logits/rejected": -2.457887887954712, "logps/chosen": -293.56829833984375, "logps/rejected": -410.14312744140625, "loss": 0.0276, "rewards/accuracies": 1.0, "rewards/chosen": -0.3593311309814453, "rewards/margins": 12.338621139526367, "rewards/rejected": -12.697952270507812, "step": 9400 }, { "epoch": 2.26, "learning_rate": 1.361650918167231e-07, "logits/chosen": -2.2344985008239746, "logits/rejected": -2.2863659858703613, "logps/chosen": -278.97662353515625, "logps/rejected": -329.68560791015625, "loss": 0.1062, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.71694278717041, "rewards/margins": 8.286422729492188, "rewards/rejected": -11.003364562988281, "step": 9410 }, { "epoch": 2.27, "learning_rate": 1.3571937956855053e-07, "logits/chosen": -2.4888503551483154, "logits/rejected": -2.3015456199645996, "logps/chosen": -303.28668212890625, "logps/rejected": -330.32354736328125, "loss": 0.0319, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7734349966049194, "rewards/margins": 13.611085891723633, "rewards/rejected": -14.384519577026367, "step": 9420 }, { "epoch": 2.27, "learning_rate": 1.3527366732037796e-07, "logits/chosen": -2.4066035747528076, "logits/rejected": -2.43558931350708, "logps/chosen": -346.6483459472656, "logps/rejected": -445.6409606933594, "loss": 0.0216, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1004126071929932, "rewards/margins": 10.706197738647461, "rewards/rejected": -11.806611061096191, "step": 9430 }, { "epoch": 2.27, "learning_rate": 1.3482795507220537e-07, "logits/chosen": -2.3893866539001465, "logits/rejected": -2.4582602977752686, "logps/chosen": -265.438720703125, "logps/rejected": -428.552490234375, "loss": 0.0272, "rewards/accuracies": 1.0, "rewards/chosen": 1.036462664604187, "rewards/margins": 17.461559295654297, "rewards/rejected": -16.425098419189453, "step": 9440 }, { "epoch": 2.27, "learning_rate": 1.343822428240328e-07, "logits/chosen": -2.4005985260009766, "logits/rejected": -2.3538804054260254, "logps/chosen": -315.60589599609375, "logps/rejected": -392.16656494140625, "loss": 0.02, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.5265145301818848, "rewards/margins": 9.552511215209961, "rewards/rejected": -13.079025268554688, "step": 9450 }, { "epoch": 2.28, "learning_rate": 1.3393653057586023e-07, "logits/chosen": -2.221489906311035, "logits/rejected": -2.1872735023498535, "logps/chosen": -247.34375, "logps/rejected": -343.33453369140625, "loss": 0.0234, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.9642674922943115, "rewards/margins": 9.500177383422852, "rewards/rejected": -11.464445114135742, "step": 9460 }, { "epoch": 2.28, "learning_rate": 1.3349081832768763e-07, "logits/chosen": -2.3547425270080566, "logits/rejected": -2.3052620887756348, "logps/chosen": -292.5299377441406, "logps/rejected": -315.9531555175781, "loss": 0.0244, "rewards/accuracies": 1.0, "rewards/chosen": -2.2172069549560547, "rewards/margins": 7.924900054931641, "rewards/rejected": -10.142107009887695, "step": 9470 }, { "epoch": 2.28, "learning_rate": 1.3304510607951506e-07, "logits/chosen": -2.4919469356536865, "logits/rejected": -2.4380953311920166, "logps/chosen": -262.9488830566406, "logps/rejected": -314.8113708496094, "loss": 0.0172, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8102928996086121, "rewards/margins": 11.47692584991455, "rewards/rejected": -12.28721809387207, "step": 9480 }, { "epoch": 2.28, "learning_rate": 1.3259939383134246e-07, "logits/chosen": -2.3655810356140137, "logits/rejected": -2.4084105491638184, "logps/chosen": -271.9031677246094, "logps/rejected": -357.02020263671875, "loss": 0.0452, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.6531856060028076, "rewards/margins": 11.6923246383667, "rewards/rejected": -13.345510482788086, "step": 9490 }, { "epoch": 2.29, "learning_rate": 1.321536815831699e-07, "logits/chosen": -2.414926290512085, "logits/rejected": -2.4577858448028564, "logps/chosen": -236.73220825195312, "logps/rejected": -415.4325256347656, "loss": 0.037, "rewards/accuracies": 1.0, "rewards/chosen": -0.3003663122653961, "rewards/margins": 12.758208274841309, "rewards/rejected": -13.058575630187988, "step": 9500 }, { "epoch": 2.29, "learning_rate": 1.3170796933499732e-07, "logits/chosen": -2.4245550632476807, "logits/rejected": -2.4358413219451904, "logps/chosen": -224.95639038085938, "logps/rejected": -442.0921325683594, "loss": 0.0293, "rewards/accuracies": 1.0, "rewards/chosen": -1.066308856010437, "rewards/margins": 12.269311904907227, "rewards/rejected": -13.335619926452637, "step": 9510 }, { "epoch": 2.29, "learning_rate": 1.3126225708682472e-07, "logits/chosen": -2.546781539916992, "logits/rejected": -2.5013813972473145, "logps/chosen": -240.187744140625, "logps/rejected": -337.92889404296875, "loss": 0.0321, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.5588526725769043, "rewards/margins": 10.870650291442871, "rewards/rejected": -13.42950439453125, "step": 9520 }, { "epoch": 2.29, "learning_rate": 1.3081654483865215e-07, "logits/chosen": -2.231473684310913, "logits/rejected": -2.2529168128967285, "logps/chosen": -230.21041870117188, "logps/rejected": -391.0644836425781, "loss": 0.0509, "rewards/accuracies": 1.0, "rewards/chosen": -2.672971487045288, "rewards/margins": 11.440793991088867, "rewards/rejected": -14.11376667022705, "step": 9530 }, { "epoch": 2.3, "learning_rate": 1.3037083259047958e-07, "logits/chosen": -2.488002061843872, "logits/rejected": -2.398108959197998, "logps/chosen": -334.773193359375, "logps/rejected": -367.2915344238281, "loss": 0.0267, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.903795063495636, "rewards/margins": 11.955655097961426, "rewards/rejected": -12.859451293945312, "step": 9540 }, { "epoch": 2.3, "learning_rate": 1.2992512034230698e-07, "logits/chosen": -2.5971035957336426, "logits/rejected": -2.5601534843444824, "logps/chosen": -293.69110107421875, "logps/rejected": -454.81707763671875, "loss": 0.0318, "rewards/accuracies": 1.0, "rewards/chosen": -0.5811969041824341, "rewards/margins": 13.334210395812988, "rewards/rejected": -13.915410041809082, "step": 9550 }, { "epoch": 2.3, "learning_rate": 1.2947940809413444e-07, "logits/chosen": -2.485360622406006, "logits/rejected": -2.418557643890381, "logps/chosen": -231.5789794921875, "logps/rejected": -315.5708923339844, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": -1.3320872783660889, "rewards/margins": 9.851500511169434, "rewards/rejected": -11.183588981628418, "step": 9560 }, { "epoch": 2.3, "learning_rate": 1.2903369584596184e-07, "logits/chosen": -2.412652015686035, "logits/rejected": -2.320526599884033, "logps/chosen": -188.39639282226562, "logps/rejected": -285.4320983886719, "loss": 0.0247, "rewards/accuracies": 1.0, "rewards/chosen": -3.2676024436950684, "rewards/margins": 10.311906814575195, "rewards/rejected": -13.579508781433105, "step": 9570 }, { "epoch": 2.31, "learning_rate": 1.2858798359778927e-07, "logits/chosen": -2.516284704208374, "logits/rejected": -2.543175458908081, "logps/chosen": -307.69793701171875, "logps/rejected": -373.61248779296875, "loss": 0.0413, "rewards/accuracies": 1.0, "rewards/chosen": -0.6909498572349548, "rewards/margins": 11.186471939086914, "rewards/rejected": -11.877422332763672, "step": 9580 }, { "epoch": 2.31, "learning_rate": 1.281422713496167e-07, "logits/chosen": -2.5738749504089355, "logits/rejected": -2.4848976135253906, "logps/chosen": -272.22686767578125, "logps/rejected": -343.75701904296875, "loss": 0.0327, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.9130065441131592, "rewards/margins": 9.962735176086426, "rewards/rejected": -11.875740051269531, "step": 9590 }, { "epoch": 2.31, "learning_rate": 1.276965591014441e-07, "logits/chosen": -2.421653985977173, "logits/rejected": -2.423694133758545, "logps/chosen": -233.5714569091797, "logps/rejected": -395.04241943359375, "loss": 0.0425, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.3771601617336273, "rewards/margins": 12.321215629577637, "rewards/rejected": -12.69837474822998, "step": 9600 }, { "epoch": 2.31, "eval_logits/chosen": -2.1774609088897705, "eval_logits/rejected": -2.1409823894500732, "eval_logps/chosen": -275.62860107421875, "eval_logps/rejected": -303.3002014160156, "eval_loss": 0.6297720074653625, "eval_rewards/accuracies": 0.6924999952316284, "eval_rewards/chosen": -7.370106220245361, "eval_rewards/margins": 4.006834983825684, "eval_rewards/rejected": -11.376940727233887, "eval_runtime": 134.5205, "eval_samples_per_second": 23.461, "eval_steps_per_second": 0.372, "step": 9600 }, { "epoch": 2.31, "learning_rate": 1.2725084685327153e-07, "logits/chosen": -2.389843225479126, "logits/rejected": -2.4056458473205566, "logps/chosen": -237.8639373779297, "logps/rejected": -419.3231506347656, "loss": 0.0324, "rewards/accuracies": 1.0, "rewards/chosen": -2.295149326324463, "rewards/margins": 13.496070861816406, "rewards/rejected": -15.791223526000977, "step": 9610 }, { "epoch": 2.32, "learning_rate": 1.2680513460509896e-07, "logits/chosen": -2.48187518119812, "logits/rejected": -2.4992806911468506, "logps/chosen": -288.1156005859375, "logps/rejected": -398.43634033203125, "loss": 0.022, "rewards/accuracies": 1.0, "rewards/chosen": -0.5226317048072815, "rewards/margins": 12.12777042388916, "rewards/rejected": -12.650402069091797, "step": 9620 }, { "epoch": 2.32, "learning_rate": 1.2635942235692637e-07, "logits/chosen": -2.2290313243865967, "logits/rejected": -2.30084490776062, "logps/chosen": -241.2779998779297, "logps/rejected": -418.93658447265625, "loss": 0.0201, "rewards/accuracies": 1.0, "rewards/chosen": 0.7349122166633606, "rewards/margins": 16.390789031982422, "rewards/rejected": -15.655878067016602, "step": 9630 }, { "epoch": 2.32, "learning_rate": 1.259137101087538e-07, "logits/chosen": -2.646573543548584, "logits/rejected": -2.5851197242736816, "logps/chosen": -294.6817932128906, "logps/rejected": -338.83929443359375, "loss": 0.0201, "rewards/accuracies": 1.0, "rewards/chosen": -1.8279914855957031, "rewards/margins": 10.240830421447754, "rewards/rejected": -12.068822860717773, "step": 9640 }, { "epoch": 2.32, "learning_rate": 1.254679978605812e-07, "logits/chosen": -2.524639844894409, "logits/rejected": -2.334158182144165, "logps/chosen": -328.20819091796875, "logps/rejected": -369.3468322753906, "loss": 0.0429, "rewards/accuracies": 1.0, "rewards/chosen": -0.4329562783241272, "rewards/margins": 11.776590347290039, "rewards/rejected": -12.20954704284668, "step": 9650 }, { "epoch": 2.32, "learning_rate": 1.2502228561240863e-07, "logits/chosen": -2.627685308456421, "logits/rejected": -2.4850966930389404, "logps/chosen": -336.99603271484375, "logps/rejected": -349.14068603515625, "loss": 0.0373, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.9046379327774048, "rewards/margins": 9.74839973449707, "rewards/rejected": -10.653037071228027, "step": 9660 }, { "epoch": 2.33, "learning_rate": 1.2457657336423606e-07, "logits/chosen": -2.630577802658081, "logits/rejected": -2.5514895915985107, "logps/chosen": -304.5028381347656, "logps/rejected": -439.1168518066406, "loss": 0.0378, "rewards/accuracies": 1.0, "rewards/chosen": 1.3046289682388306, "rewards/margins": 14.980245590209961, "rewards/rejected": -13.675616264343262, "step": 9670 }, { "epoch": 2.33, "learning_rate": 1.2413086111606346e-07, "logits/chosen": -2.4933369159698486, "logits/rejected": -2.3936216831207275, "logps/chosen": -251.20663452148438, "logps/rejected": -359.5411376953125, "loss": 0.041, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.0318059921264648, "rewards/margins": 11.094675064086914, "rewards/rejected": -12.126481056213379, "step": 9680 }, { "epoch": 2.33, "learning_rate": 1.236851488678909e-07, "logits/chosen": -2.588134288787842, "logits/rejected": -2.4921040534973145, "logps/chosen": -315.86358642578125, "logps/rejected": -523.5921630859375, "loss": 0.0199, "rewards/accuracies": 1.0, "rewards/chosen": -0.24267932772636414, "rewards/margins": 13.36680793762207, "rewards/rejected": -13.60948657989502, "step": 9690 }, { "epoch": 2.33, "learning_rate": 1.2323943661971832e-07, "logits/chosen": -2.494471311569214, "logits/rejected": -2.561774730682373, "logps/chosen": -238.9729766845703, "logps/rejected": -367.56536865234375, "loss": 0.0408, "rewards/accuracies": 1.0, "rewards/chosen": 0.7989002466201782, "rewards/margins": 14.336524963378906, "rewards/rejected": -13.537625312805176, "step": 9700 }, { "epoch": 2.34, "learning_rate": 1.2279372437154572e-07, "logits/chosen": -2.5178780555725098, "logits/rejected": -2.3850350379943848, "logps/chosen": -308.134521484375, "logps/rejected": -334.61968994140625, "loss": 0.0398, "rewards/accuracies": 1.0, "rewards/chosen": 0.12703149020671844, "rewards/margins": 11.759805679321289, "rewards/rejected": -11.632774353027344, "step": 9710 }, { "epoch": 2.34, "learning_rate": 1.2234801212337315e-07, "logits/chosen": -2.6157431602478027, "logits/rejected": -2.5810580253601074, "logps/chosen": -265.21673583984375, "logps/rejected": -375.5794372558594, "loss": 0.046, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.4621336460113525, "rewards/margins": 10.865510940551758, "rewards/rejected": -12.327642440795898, "step": 9720 }, { "epoch": 2.34, "learning_rate": 1.2190229987520055e-07, "logits/chosen": -2.553041696548462, "logits/rejected": -2.589735507965088, "logps/chosen": -329.040771484375, "logps/rejected": -461.7129821777344, "loss": 0.0288, "rewards/accuracies": 1.0, "rewards/chosen": 0.9191287159919739, "rewards/margins": 15.172648429870605, "rewards/rejected": -14.253519058227539, "step": 9730 }, { "epoch": 2.34, "learning_rate": 1.2145658762702798e-07, "logits/chosen": -2.552706241607666, "logits/rejected": -2.5576248168945312, "logps/chosen": -222.6065216064453, "logps/rejected": -308.9010925292969, "loss": 0.0289, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7514709830284119, "rewards/margins": 10.980058670043945, "rewards/rejected": -11.731529235839844, "step": 9740 }, { "epoch": 2.35, "learning_rate": 1.210108753788554e-07, "logits/chosen": -2.3641655445098877, "logits/rejected": -2.234936237335205, "logps/chosen": -336.6866760253906, "logps/rejected": -354.4670715332031, "loss": 0.0386, "rewards/accuracies": 1.0, "rewards/chosen": -0.40606531500816345, "rewards/margins": 11.603143692016602, "rewards/rejected": -12.009209632873535, "step": 9750 }, { "epoch": 2.35, "learning_rate": 1.2056516313068281e-07, "logits/chosen": -2.3288071155548096, "logits/rejected": -2.1793508529663086, "logps/chosen": -283.40777587890625, "logps/rejected": -471.2051696777344, "loss": 0.0336, "rewards/accuracies": 1.0, "rewards/chosen": -1.462835669517517, "rewards/margins": 15.888590812683105, "rewards/rejected": -17.35142707824707, "step": 9760 }, { "epoch": 2.35, "learning_rate": 1.2011945088251024e-07, "logits/chosen": -2.5297608375549316, "logits/rejected": -2.5485751628875732, "logps/chosen": -210.74423217773438, "logps/rejected": -325.4725036621094, "loss": 0.0128, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6324639320373535, "rewards/margins": 10.859331130981445, "rewards/rejected": -11.49179458618164, "step": 9770 }, { "epoch": 2.35, "learning_rate": 1.1967373863433767e-07, "logits/chosen": -2.7175004482269287, "logits/rejected": -2.4431304931640625, "logps/chosen": -364.6636047363281, "logps/rejected": -330.05438232421875, "loss": 0.0285, "rewards/accuracies": 1.0, "rewards/chosen": -1.0363945960998535, "rewards/margins": 10.648542404174805, "rewards/rejected": -11.684937477111816, "step": 9780 }, { "epoch": 2.36, "learning_rate": 1.1922802638616508e-07, "logits/chosen": -2.5165154933929443, "logits/rejected": -2.367253065109253, "logps/chosen": -265.6626892089844, "logps/rejected": -372.4963684082031, "loss": 0.0234, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.449108600616455, "rewards/margins": 10.355419158935547, "rewards/rejected": -12.804529190063477, "step": 9790 }, { "epoch": 2.36, "learning_rate": 1.187823141379925e-07, "logits/chosen": -2.5441689491271973, "logits/rejected": -2.5348575115203857, "logps/chosen": -245.9356231689453, "logps/rejected": -394.76190185546875, "loss": 0.0364, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.8920745849609375, "rewards/margins": 9.532918930053711, "rewards/rejected": -11.424993515014648, "step": 9800 }, { "epoch": 2.36, "learning_rate": 1.1833660188981992e-07, "logits/chosen": -2.47633695602417, "logits/rejected": -2.4435997009277344, "logps/chosen": -290.0943298339844, "logps/rejected": -384.2179870605469, "loss": 0.0312, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.4887150526046753, "rewards/margins": 10.321699142456055, "rewards/rejected": -11.81041431427002, "step": 9810 }, { "epoch": 2.36, "learning_rate": 1.1789088964164735e-07, "logits/chosen": -2.4126784801483154, "logits/rejected": -2.377180814743042, "logps/chosen": -232.51077270507812, "logps/rejected": -388.77349853515625, "loss": 0.0522, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.0534749031066895, "rewards/margins": 11.633764266967773, "rewards/rejected": -13.687238693237305, "step": 9820 }, { "epoch": 2.37, "learning_rate": 1.1744517739347477e-07, "logits/chosen": -2.3851730823516846, "logits/rejected": -2.3036623001098633, "logps/chosen": -206.77310180664062, "logps/rejected": -308.92852783203125, "loss": 0.0318, "rewards/accuracies": 1.0, "rewards/chosen": -1.3479121923446655, "rewards/margins": 12.278934478759766, "rewards/rejected": -13.626846313476562, "step": 9830 }, { "epoch": 2.37, "learning_rate": 1.169994651453022e-07, "logits/chosen": -2.593183994293213, "logits/rejected": -2.4439361095428467, "logps/chosen": -338.0233459472656, "logps/rejected": -355.5791320800781, "loss": 0.0298, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.021317005157471, "rewards/margins": 10.08646297454834, "rewards/rejected": -14.107780456542969, "step": 9840 }, { "epoch": 2.37, "learning_rate": 1.1655375289712961e-07, "logits/chosen": -2.450833797454834, "logits/rejected": -2.400195598602295, "logps/chosen": -173.69932556152344, "logps/rejected": -285.7006530761719, "loss": 0.0377, "rewards/accuracies": 1.0, "rewards/chosen": -2.102134943008423, "rewards/margins": 9.48808479309082, "rewards/rejected": -11.59022045135498, "step": 9850 }, { "epoch": 2.37, "learning_rate": 1.1610804064895703e-07, "logits/chosen": -2.417642116546631, "logits/rejected": -2.4772324562072754, "logps/chosen": -222.88394165039062, "logps/rejected": -345.0111083984375, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": -1.2814255952835083, "rewards/margins": 13.139358520507812, "rewards/rejected": -14.420782089233398, "step": 9860 }, { "epoch": 2.38, "learning_rate": 1.1566232840078444e-07, "logits/chosen": -2.580521821975708, "logits/rejected": -2.4695916175842285, "logps/chosen": -320.3534240722656, "logps/rejected": -414.2294921875, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": -0.5010542869567871, "rewards/margins": 11.957185745239258, "rewards/rejected": -12.458239555358887, "step": 9870 }, { "epoch": 2.38, "learning_rate": 1.1521661615261187e-07, "logits/chosen": -2.3649206161499023, "logits/rejected": -2.2867565155029297, "logps/chosen": -215.96047973632812, "logps/rejected": -383.0747985839844, "loss": 0.0231, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.173766136169434, "rewards/margins": 9.420974731445312, "rewards/rejected": -13.594741821289062, "step": 9880 }, { "epoch": 2.38, "learning_rate": 1.1477090390443929e-07, "logits/chosen": -2.579761266708374, "logits/rejected": -2.483165979385376, "logps/chosen": -265.85418701171875, "logps/rejected": -347.1213073730469, "loss": 0.0242, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.4519851207733154, "rewards/margins": 9.690813064575195, "rewards/rejected": -12.14279842376709, "step": 9890 }, { "epoch": 2.38, "learning_rate": 1.143251916562667e-07, "logits/chosen": -2.343977451324463, "logits/rejected": -2.2638773918151855, "logps/chosen": -212.83773803710938, "logps/rejected": -249.4177703857422, "loss": 0.0298, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.4832682609558105, "rewards/margins": 8.23595142364502, "rewards/rejected": -10.719220161437988, "step": 9900 }, { "epoch": 2.39, "learning_rate": 1.1387947940809412e-07, "logits/chosen": -2.5048470497131348, "logits/rejected": -2.4498581886291504, "logps/chosen": -318.59173583984375, "logps/rejected": -404.7265319824219, "loss": 0.0192, "rewards/accuracies": 1.0, "rewards/chosen": -2.5952701568603516, "rewards/margins": 10.584699630737305, "rewards/rejected": -13.179969787597656, "step": 9910 }, { "epoch": 2.39, "learning_rate": 1.1343376715992155e-07, "logits/chosen": -2.5422251224517822, "logits/rejected": -2.4049437046051025, "logps/chosen": -292.94915771484375, "logps/rejected": -344.04693603515625, "loss": 0.0339, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.367684841156006, "rewards/margins": 10.511276245117188, "rewards/rejected": -14.878959655761719, "step": 9920 }, { "epoch": 2.39, "learning_rate": 1.1298805491174897e-07, "logits/chosen": -2.428804874420166, "logits/rejected": -2.3704833984375, "logps/chosen": -240.29928588867188, "logps/rejected": -316.4564514160156, "loss": 0.0342, "rewards/accuracies": 1.0, "rewards/chosen": -2.077416181564331, "rewards/margins": 11.797285079956055, "rewards/rejected": -13.874700546264648, "step": 9930 }, { "epoch": 2.39, "learning_rate": 1.1254234266357638e-07, "logits/chosen": -2.7007205486297607, "logits/rejected": -2.5549190044403076, "logps/chosen": -358.87432861328125, "logps/rejected": -416.8155822753906, "loss": 0.0307, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.512645721435547, "rewards/margins": 10.297310829162598, "rewards/rejected": -12.809954643249512, "step": 9940 }, { "epoch": 2.39, "learning_rate": 1.1209663041540381e-07, "logits/chosen": -2.326416015625, "logits/rejected": -2.425079345703125, "logps/chosen": -279.13165283203125, "logps/rejected": -431.99810791015625, "loss": 0.0243, "rewards/accuracies": 1.0, "rewards/chosen": -1.2667337656021118, "rewards/margins": 11.267843246459961, "rewards/rejected": -12.534574508666992, "step": 9950 }, { "epoch": 2.4, "learning_rate": 1.1165091816723123e-07, "logits/chosen": -2.422116756439209, "logits/rejected": -2.366100311279297, "logps/chosen": -279.7535705566406, "logps/rejected": -423.58154296875, "loss": 0.0289, "rewards/accuracies": 1.0, "rewards/chosen": -1.5250468254089355, "rewards/margins": 12.391502380371094, "rewards/rejected": -13.916549682617188, "step": 9960 }, { "epoch": 2.4, "learning_rate": 1.1120520591905864e-07, "logits/chosen": -2.312122344970703, "logits/rejected": -2.350348472595215, "logps/chosen": -152.44276428222656, "logps/rejected": -247.03585815429688, "loss": 0.0325, "rewards/accuracies": 1.0, "rewards/chosen": -1.245760202407837, "rewards/margins": 10.911714553833008, "rewards/rejected": -12.157475471496582, "step": 9970 }, { "epoch": 2.4, "learning_rate": 1.1075949367088606e-07, "logits/chosen": -2.565394878387451, "logits/rejected": -2.5081286430358887, "logps/chosen": -334.61175537109375, "logps/rejected": -351.79779052734375, "loss": 0.021, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7482221126556396, "rewards/margins": 11.15458869934082, "rewards/rejected": -11.902810096740723, "step": 9980 }, { "epoch": 2.4, "learning_rate": 1.103137814227135e-07, "logits/chosen": -2.3138725757598877, "logits/rejected": -2.324903726577759, "logps/chosen": -213.28121948242188, "logps/rejected": -337.15155029296875, "loss": 0.0215, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.5757930278778076, "rewards/margins": 9.91811466217041, "rewards/rejected": -13.49390697479248, "step": 9990 }, { "epoch": 2.41, "learning_rate": 1.0986806917454092e-07, "logits/chosen": -2.5492286682128906, "logits/rejected": -2.4687278270721436, "logps/chosen": -217.9174346923828, "logps/rejected": -307.69195556640625, "loss": 0.0387, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.6853030920028687, "rewards/margins": 11.250697135925293, "rewards/rejected": -12.936001777648926, "step": 10000 }, { "epoch": 2.41, "eval_logits/chosen": -2.216874361038208, "eval_logits/rejected": -2.1790802478790283, "eval_logps/chosen": -275.18701171875, "eval_logps/rejected": -304.8103942871094, "eval_loss": 0.6269313097000122, "eval_rewards/accuracies": 0.6974999904632568, "eval_rewards/chosen": -7.325944900512695, "eval_rewards/margins": 4.202017307281494, "eval_rewards/rejected": -11.527961730957031, "eval_runtime": 134.306, "eval_samples_per_second": 23.499, "eval_steps_per_second": 0.372, "step": 10000 }, { "epoch": 2.41, "learning_rate": 1.0942235692636834e-07, "logits/chosen": -2.2933192253112793, "logits/rejected": -2.280583143234253, "logps/chosen": -287.15576171875, "logps/rejected": -405.9715270996094, "loss": 0.0218, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.2334377765655518, "rewards/margins": 10.364022254943848, "rewards/rejected": -13.59745979309082, "step": 10010 }, { "epoch": 2.41, "learning_rate": 1.0897664467819575e-07, "logits/chosen": -2.4617159366607666, "logits/rejected": -2.2975640296936035, "logps/chosen": -238.12191772460938, "logps/rejected": -386.0503234863281, "loss": 0.021, "rewards/accuracies": 1.0, "rewards/chosen": -3.2631583213806152, "rewards/margins": 12.070978164672852, "rewards/rejected": -15.334136962890625, "step": 10020 }, { "epoch": 2.41, "learning_rate": 1.0853093243002318e-07, "logits/chosen": -2.505117893218994, "logits/rejected": -2.462644100189209, "logps/chosen": -388.018798828125, "logps/rejected": -362.15228271484375, "loss": 0.0225, "rewards/accuracies": 1.0, "rewards/chosen": -0.5008872747421265, "rewards/margins": 13.235308647155762, "rewards/rejected": -13.73619556427002, "step": 10030 }, { "epoch": 2.42, "learning_rate": 1.080852201818506e-07, "logits/chosen": -2.4457573890686035, "logits/rejected": -2.3285858631134033, "logps/chosen": -289.85003662109375, "logps/rejected": -347.39788818359375, "loss": 0.0192, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.542717933654785, "rewards/margins": 10.714621543884277, "rewards/rejected": -13.257339477539062, "step": 10040 }, { "epoch": 2.42, "learning_rate": 1.0763950793367801e-07, "logits/chosen": -2.528005599975586, "logits/rejected": -2.470735549926758, "logps/chosen": -259.02911376953125, "logps/rejected": -393.9635009765625, "loss": 0.0229, "rewards/accuracies": 1.0, "rewards/chosen": -0.5142813324928284, "rewards/margins": 11.019901275634766, "rewards/rejected": -11.534183502197266, "step": 10050 }, { "epoch": 2.42, "learning_rate": 1.0719379568550543e-07, "logits/chosen": -2.405470371246338, "logits/rejected": -2.355475425720215, "logps/chosen": -290.2284240722656, "logps/rejected": -363.837890625, "loss": 0.0221, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.6940453052520752, "rewards/margins": 10.649921417236328, "rewards/rejected": -12.343965530395508, "step": 10060 }, { "epoch": 2.42, "learning_rate": 1.0674808343733286e-07, "logits/chosen": -2.5033342838287354, "logits/rejected": -2.4000964164733887, "logps/chosen": -280.3537292480469, "logps/rejected": -360.11187744140625, "loss": 0.039, "rewards/accuracies": 1.0, "rewards/chosen": -0.7043596506118774, "rewards/margins": 12.54203987121582, "rewards/rejected": -13.24639892578125, "step": 10070 }, { "epoch": 2.43, "learning_rate": 1.0630237118916027e-07, "logits/chosen": -2.4509224891662598, "logits/rejected": -2.3927853107452393, "logps/chosen": -286.2995300292969, "logps/rejected": -384.1947021484375, "loss": 0.0356, "rewards/accuracies": 1.0, "rewards/chosen": 0.18650178611278534, "rewards/margins": 14.649968147277832, "rewards/rejected": -14.463467597961426, "step": 10080 }, { "epoch": 2.43, "learning_rate": 1.0585665894098769e-07, "logits/chosen": -2.4938857555389404, "logits/rejected": -2.4885623455047607, "logps/chosen": -278.3260498046875, "logps/rejected": -426.4647521972656, "loss": 0.0368, "rewards/accuracies": 1.0, "rewards/chosen": -0.8486827611923218, "rewards/margins": 13.495234489440918, "rewards/rejected": -14.343917846679688, "step": 10090 }, { "epoch": 2.43, "learning_rate": 1.0541094669281511e-07, "logits/chosen": -2.4637417793273926, "logits/rejected": -2.4643568992614746, "logps/chosen": -325.77374267578125, "logps/rejected": -400.1005554199219, "loss": 0.041, "rewards/accuracies": 1.0, "rewards/chosen": 0.8435616493225098, "rewards/margins": 15.150815963745117, "rewards/rejected": -14.30725383758545, "step": 10100 }, { "epoch": 2.43, "learning_rate": 1.0496523444464254e-07, "logits/chosen": -2.7043046951293945, "logits/rejected": -2.307955741882324, "logps/chosen": -301.4166259765625, "logps/rejected": -315.764404296875, "loss": 0.0232, "rewards/accuracies": 1.0, "rewards/chosen": 0.18431058526039124, "rewards/margins": 13.029945373535156, "rewards/rejected": -12.845632553100586, "step": 10110 }, { "epoch": 2.44, "learning_rate": 1.0451952219646995e-07, "logits/chosen": -2.6169772148132324, "logits/rejected": -2.601414680480957, "logps/chosen": -270.9599609375, "logps/rejected": -337.7686767578125, "loss": 0.0564, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.4154558181762695, "rewards/margins": 9.172788619995117, "rewards/rejected": -13.588244438171387, "step": 10120 }, { "epoch": 2.44, "learning_rate": 1.0407380994829737e-07, "logits/chosen": -2.5294604301452637, "logits/rejected": -2.43257999420166, "logps/chosen": -332.90924072265625, "logps/rejected": -346.5657653808594, "loss": 0.0245, "rewards/accuracies": 1.0, "rewards/chosen": -1.092760443687439, "rewards/margins": 12.378458023071289, "rewards/rejected": -13.471220016479492, "step": 10130 }, { "epoch": 2.44, "learning_rate": 1.0362809770012478e-07, "logits/chosen": -2.434919595718384, "logits/rejected": -2.4524292945861816, "logps/chosen": -225.8126220703125, "logps/rejected": -380.1116638183594, "loss": 0.0442, "rewards/accuracies": 1.0, "rewards/chosen": -0.25910013914108276, "rewards/margins": 11.829145431518555, "rewards/rejected": -12.088244438171387, "step": 10140 }, { "epoch": 2.44, "learning_rate": 1.0318238545195221e-07, "logits/chosen": -2.574103832244873, "logits/rejected": -2.5287487506866455, "logps/chosen": -272.11920166015625, "logps/rejected": -312.4232482910156, "loss": 0.0288, "rewards/accuracies": 1.0, "rewards/chosen": -1.0098073482513428, "rewards/margins": 10.273530960083008, "rewards/rejected": -11.283336639404297, "step": 10150 }, { "epoch": 2.45, "learning_rate": 1.0273667320377964e-07, "logits/chosen": -2.452679395675659, "logits/rejected": -2.2785086631774902, "logps/chosen": -238.67758178710938, "logps/rejected": -286.9755859375, "loss": 0.0364, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8719562292098999, "rewards/margins": 10.701383590698242, "rewards/rejected": -11.57334041595459, "step": 10160 }, { "epoch": 2.45, "learning_rate": 1.0229096095560706e-07, "logits/chosen": -2.5121803283691406, "logits/rejected": -2.3672566413879395, "logps/chosen": -350.19622802734375, "logps/rejected": -336.66656494140625, "loss": 0.0395, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.574341297149658, "rewards/margins": 10.227441787719727, "rewards/rejected": -12.801783561706543, "step": 10170 }, { "epoch": 2.45, "learning_rate": 1.0184524870743448e-07, "logits/chosen": -2.5346298217773438, "logits/rejected": -2.4361207485198975, "logps/chosen": -271.7481994628906, "logps/rejected": -352.82403564453125, "loss": 0.0449, "rewards/accuracies": 1.0, "rewards/chosen": -1.7823295593261719, "rewards/margins": 10.890006065368652, "rewards/rejected": -12.672337532043457, "step": 10180 }, { "epoch": 2.45, "learning_rate": 1.013995364592619e-07, "logits/chosen": -2.5501441955566406, "logits/rejected": -2.5360169410705566, "logps/chosen": -307.5736999511719, "logps/rejected": -310.0274963378906, "loss": 0.0408, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.210803508758545, "rewards/margins": 9.402273178100586, "rewards/rejected": -12.613077163696289, "step": 10190 }, { "epoch": 2.45, "learning_rate": 1.0095382421108932e-07, "logits/chosen": -2.447296142578125, "logits/rejected": -2.3583357334136963, "logps/chosen": -268.5548095703125, "logps/rejected": -295.54937744140625, "loss": 0.033, "rewards/accuracies": 1.0, "rewards/chosen": -0.6434500813484192, "rewards/margins": 10.180092811584473, "rewards/rejected": -10.823541641235352, "step": 10200 }, { "epoch": 2.46, "learning_rate": 1.0050811196291674e-07, "logits/chosen": -2.5113818645477295, "logits/rejected": -2.446319103240967, "logps/chosen": -300.185791015625, "logps/rejected": -336.2559509277344, "loss": 0.0473, "rewards/accuracies": 1.0, "rewards/chosen": 0.6623687744140625, "rewards/margins": 14.569671630859375, "rewards/rejected": -13.907302856445312, "step": 10210 }, { "epoch": 2.46, "learning_rate": 1.0006239971474415e-07, "logits/chosen": -2.4419267177581787, "logits/rejected": -2.31793212890625, "logps/chosen": -250.54574584960938, "logps/rejected": -264.55316162109375, "loss": 0.0375, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.5165772438049316, "rewards/margins": 8.454904556274414, "rewards/rejected": -11.97148323059082, "step": 10220 }, { "epoch": 2.46, "learning_rate": 9.961668746657158e-08, "logits/chosen": -2.562485694885254, "logits/rejected": -2.5261874198913574, "logps/chosen": -320.7162170410156, "logps/rejected": -355.2144470214844, "loss": 0.024, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5536615252494812, "rewards/margins": 12.61359691619873, "rewards/rejected": -13.167257308959961, "step": 10230 }, { "epoch": 2.46, "learning_rate": 9.9170975218399e-08, "logits/chosen": -2.394862413406372, "logits/rejected": -2.293825626373291, "logps/chosen": -270.94769287109375, "logps/rejected": -383.794189453125, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": -1.8689486980438232, "rewards/margins": 13.144645690917969, "rewards/rejected": -15.013595581054688, "step": 10240 }, { "epoch": 2.47, "learning_rate": 9.872526297022641e-08, "logits/chosen": -2.740175247192383, "logits/rejected": -2.5539283752441406, "logps/chosen": -316.5394592285156, "logps/rejected": -389.9117736816406, "loss": 0.0238, "rewards/accuracies": 1.0, "rewards/chosen": -1.2171639204025269, "rewards/margins": 10.731700897216797, "rewards/rejected": -11.94886589050293, "step": 10250 }, { "epoch": 2.47, "learning_rate": 9.827955072205383e-08, "logits/chosen": -2.3584835529327393, "logits/rejected": -2.5055510997772217, "logps/chosen": -175.42633056640625, "logps/rejected": -365.27197265625, "loss": 0.0468, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.867166042327881, "rewards/margins": 12.676493644714355, "rewards/rejected": -15.543660163879395, "step": 10260 }, { "epoch": 2.47, "learning_rate": 9.783383847388126e-08, "logits/chosen": -2.3794188499450684, "logits/rejected": -2.3353278636932373, "logps/chosen": -258.2748107910156, "logps/rejected": -432.15594482421875, "loss": 0.0191, "rewards/accuracies": 1.0, "rewards/chosen": -1.8665927648544312, "rewards/margins": 14.133447647094727, "rewards/rejected": -16.000041961669922, "step": 10270 }, { "epoch": 2.47, "learning_rate": 9.738812622570868e-08, "logits/chosen": -2.614894390106201, "logits/rejected": -2.5153114795684814, "logps/chosen": -283.90240478515625, "logps/rejected": -327.84539794921875, "loss": 0.0616, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.9870588779449463, "rewards/margins": 9.350723266601562, "rewards/rejected": -12.33778190612793, "step": 10280 }, { "epoch": 2.48, "learning_rate": 9.694241397753609e-08, "logits/chosen": -2.5317180156707764, "logits/rejected": -2.4985158443450928, "logps/chosen": -231.2266387939453, "logps/rejected": -353.6574401855469, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -0.8197168111801147, "rewards/margins": 11.543524742126465, "rewards/rejected": -12.363243103027344, "step": 10290 }, { "epoch": 2.48, "learning_rate": 9.649670172936351e-08, "logits/chosen": -2.6909403800964355, "logits/rejected": -2.625136613845825, "logps/chosen": -310.2020263671875, "logps/rejected": -420.2967224121094, "loss": 0.024, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.6183617115020752, "rewards/margins": 12.573843002319336, "rewards/rejected": -14.192204475402832, "step": 10300 }, { "epoch": 2.48, "learning_rate": 9.605098948119094e-08, "logits/chosen": -2.2787041664123535, "logits/rejected": -2.1850972175598145, "logps/chosen": -325.05218505859375, "logps/rejected": -388.6729431152344, "loss": 0.0297, "rewards/accuracies": 1.0, "rewards/chosen": -1.3511285781860352, "rewards/margins": 11.135826110839844, "rewards/rejected": -12.486954689025879, "step": 10310 }, { "epoch": 2.48, "learning_rate": 9.560527723301835e-08, "logits/chosen": -2.4997646808624268, "logits/rejected": -2.3415632247924805, "logps/chosen": -335.80474853515625, "logps/rejected": -336.45587158203125, "loss": 0.0453, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.522719383239746, "rewards/margins": 9.912080764770508, "rewards/rejected": -14.434802055358887, "step": 10320 }, { "epoch": 2.49, "learning_rate": 9.515956498484578e-08, "logits/chosen": -2.378953456878662, "logits/rejected": -2.4727540016174316, "logps/chosen": -284.6646423339844, "logps/rejected": -384.29364013671875, "loss": 0.0204, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6046100854873657, "rewards/margins": 13.905375480651855, "rewards/rejected": -14.509984970092773, "step": 10330 }, { "epoch": 2.49, "learning_rate": 9.47138527366732e-08, "logits/chosen": -2.497859239578247, "logits/rejected": -2.4668257236480713, "logps/chosen": -297.99224853515625, "logps/rejected": -406.8845520019531, "loss": 0.0394, "rewards/accuracies": 1.0, "rewards/chosen": 0.11674115806818008, "rewards/margins": 15.078516006469727, "rewards/rejected": -14.961773872375488, "step": 10340 }, { "epoch": 2.49, "learning_rate": 9.426814048850063e-08, "logits/chosen": -2.6369550228118896, "logits/rejected": -2.558452606201172, "logps/chosen": -214.35311889648438, "logps/rejected": -282.9107971191406, "loss": 0.0342, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0868337154388428, "rewards/margins": 11.665234565734863, "rewards/rejected": -12.752067565917969, "step": 10350 }, { "epoch": 2.49, "learning_rate": 9.382242824032804e-08, "logits/chosen": -2.5463333129882812, "logits/rejected": -2.383127450942993, "logps/chosen": -247.4314727783203, "logps/rejected": -374.2950439453125, "loss": 0.0276, "rewards/accuracies": 1.0, "rewards/chosen": -1.5774767398834229, "rewards/margins": 14.915138244628906, "rewards/rejected": -16.492618560791016, "step": 10360 }, { "epoch": 2.5, "learning_rate": 9.337671599215546e-08, "logits/chosen": -2.7446866035461426, "logits/rejected": -2.57399582862854, "logps/chosen": -344.39300537109375, "logps/rejected": -409.7808532714844, "loss": 0.0357, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.4495247602462769, "rewards/margins": 11.880117416381836, "rewards/rejected": -13.32964038848877, "step": 10370 }, { "epoch": 2.5, "learning_rate": 9.293100374398288e-08, "logits/chosen": -2.536357879638672, "logits/rejected": -2.47178053855896, "logps/chosen": -237.9107208251953, "logps/rejected": -413.24957275390625, "loss": 0.023, "rewards/accuracies": 1.0, "rewards/chosen": -2.1901845932006836, "rewards/margins": 12.308076858520508, "rewards/rejected": -14.498262405395508, "step": 10380 }, { "epoch": 2.5, "learning_rate": 9.24852914958103e-08, "logits/chosen": -2.4459424018859863, "logits/rejected": -2.4301600456237793, "logps/chosen": -276.6459045410156, "logps/rejected": -333.7275390625, "loss": 0.0445, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.7565501928329468, "rewards/margins": 9.935433387756348, "rewards/rejected": -11.691983222961426, "step": 10390 }, { "epoch": 2.5, "learning_rate": 9.203957924763772e-08, "logits/chosen": -2.4842679500579834, "logits/rejected": -2.438427209854126, "logps/chosen": -342.13653564453125, "logps/rejected": -422.8092346191406, "loss": 0.043, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.9871963262557983, "rewards/margins": 12.639472961425781, "rewards/rejected": -14.626668930053711, "step": 10400 }, { "epoch": 2.5, "eval_logits/chosen": -2.2662575244903564, "eval_logits/rejected": -2.230071783065796, "eval_logps/chosen": -274.166748046875, "eval_logps/rejected": -305.313720703125, "eval_loss": 0.6375707387924194, "eval_rewards/accuracies": 0.6924999952316284, "eval_rewards/chosen": -7.223921298980713, "eval_rewards/margins": 4.354369640350342, "eval_rewards/rejected": -11.578290939331055, "eval_runtime": 133.4124, "eval_samples_per_second": 23.656, "eval_steps_per_second": 0.375, "step": 10400 }, { "epoch": 2.51, "learning_rate": 9.159386699946514e-08, "logits/chosen": -2.407238483428955, "logits/rejected": -2.3920178413391113, "logps/chosen": -291.1453857421875, "logps/rejected": -341.76104736328125, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": 0.034220896661281586, "rewards/margins": 11.897378921508789, "rewards/rejected": -11.863157272338867, "step": 10410 }, { "epoch": 2.51, "learning_rate": 9.114815475129255e-08, "logits/chosen": -2.4947426319122314, "logits/rejected": -2.42100191116333, "logps/chosen": -235.4882049560547, "logps/rejected": -341.2593078613281, "loss": 0.0247, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.277120590209961, "rewards/margins": 11.250730514526367, "rewards/rejected": -14.527850151062012, "step": 10420 }, { "epoch": 2.51, "learning_rate": 9.070244250311998e-08, "logits/chosen": -2.3231470584869385, "logits/rejected": -2.484201431274414, "logps/chosen": -327.2215881347656, "logps/rejected": -483.61114501953125, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/chosen": -0.9986634254455566, "rewards/margins": 13.383715629577637, "rewards/rejected": -14.382379531860352, "step": 10430 }, { "epoch": 2.51, "learning_rate": 9.02567302549474e-08, "logits/chosen": -2.365403890609741, "logits/rejected": -2.3459725379943848, "logps/chosen": -302.86309814453125, "logps/rejected": -335.1816101074219, "loss": 0.0325, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.254432201385498, "rewards/margins": 9.722726821899414, "rewards/rejected": -11.977158546447754, "step": 10440 }, { "epoch": 2.52, "learning_rate": 8.981101800677482e-08, "logits/chosen": -2.540252208709717, "logits/rejected": -2.496572494506836, "logps/chosen": -291.1900939941406, "logps/rejected": -422.4532165527344, "loss": 0.0284, "rewards/accuracies": 1.0, "rewards/chosen": -0.518183171749115, "rewards/margins": 13.844454765319824, "rewards/rejected": -14.362638473510742, "step": 10450 }, { "epoch": 2.52, "learning_rate": 8.936530575860223e-08, "logits/chosen": -2.5834574699401855, "logits/rejected": -2.5244648456573486, "logps/chosen": -265.43572998046875, "logps/rejected": -409.3478088378906, "loss": 0.0397, "rewards/accuracies": 1.0, "rewards/chosen": 0.29316291213035583, "rewards/margins": 13.712129592895508, "rewards/rejected": -13.418966293334961, "step": 10460 }, { "epoch": 2.52, "learning_rate": 8.891959351042966e-08, "logits/chosen": -2.4341185092926025, "logits/rejected": -2.363210439682007, "logps/chosen": -258.18609619140625, "logps/rejected": -299.75970458984375, "loss": 0.0286, "rewards/accuracies": 1.0, "rewards/chosen": 0.06365285068750381, "rewards/margins": 10.271844863891602, "rewards/rejected": -10.208192825317383, "step": 10470 }, { "epoch": 2.52, "learning_rate": 8.847388126225708e-08, "logits/chosen": -2.6811318397521973, "logits/rejected": -2.5494422912597656, "logps/chosen": -232.71731567382812, "logps/rejected": -343.6226501464844, "loss": 0.0504, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2904490232467651, "rewards/margins": 12.419363975524902, "rewards/rejected": -13.709814071655273, "step": 10480 }, { "epoch": 2.52, "learning_rate": 8.80281690140845e-08, "logits/chosen": -2.5047972202301025, "logits/rejected": -2.453812599182129, "logps/chosen": -192.08062744140625, "logps/rejected": -309.33734130859375, "loss": 0.0493, "rewards/accuracies": 1.0, "rewards/chosen": -0.5243643522262573, "rewards/margins": 10.752581596374512, "rewards/rejected": -11.276945114135742, "step": 10490 }, { "epoch": 2.53, "learning_rate": 8.758245676591194e-08, "logits/chosen": -2.4655632972717285, "logits/rejected": -2.5261590480804443, "logps/chosen": -268.0155334472656, "logps/rejected": -339.0793151855469, "loss": 0.036, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.6569886207580566, "rewards/margins": 10.690444946289062, "rewards/rejected": -13.347434997558594, "step": 10500 }, { "epoch": 2.53, "learning_rate": 8.713674451773935e-08, "logits/chosen": -2.485379219055176, "logits/rejected": -2.406667709350586, "logps/chosen": -222.50927734375, "logps/rejected": -289.3091735839844, "loss": 0.0345, "rewards/accuracies": 1.0, "rewards/chosen": -1.4753628969192505, "rewards/margins": 10.559330940246582, "rewards/rejected": -12.034693717956543, "step": 10510 }, { "epoch": 2.53, "learning_rate": 8.669103226956677e-08, "logits/chosen": -2.5506882667541504, "logits/rejected": -2.442164659500122, "logps/chosen": -228.38003540039062, "logps/rejected": -344.5788269042969, "loss": 0.0413, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.098649501800537, "rewards/margins": 11.076091766357422, "rewards/rejected": -13.1747407913208, "step": 10520 }, { "epoch": 2.53, "learning_rate": 8.624532002139418e-08, "logits/chosen": -2.312070846557617, "logits/rejected": -2.3365490436553955, "logps/chosen": -238.8922576904297, "logps/rejected": -301.13873291015625, "loss": 0.0376, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.9768481254577637, "rewards/margins": 8.800875663757324, "rewards/rejected": -12.777724266052246, "step": 10530 }, { "epoch": 2.54, "learning_rate": 8.579960777322161e-08, "logits/chosen": -2.561098575592041, "logits/rejected": -2.6573328971862793, "logps/chosen": -239.29849243164062, "logps/rejected": -347.3624267578125, "loss": 0.0416, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.0099174976348877, "rewards/margins": 9.177538871765137, "rewards/rejected": -11.187456130981445, "step": 10540 }, { "epoch": 2.54, "learning_rate": 8.535389552504903e-08, "logits/chosen": -2.4423718452453613, "logits/rejected": -2.1314046382904053, "logps/chosen": -230.24740600585938, "logps/rejected": -328.03314208984375, "loss": 0.0298, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.904605865478516, "rewards/margins": 10.746953964233398, "rewards/rejected": -15.651559829711914, "step": 10550 }, { "epoch": 2.54, "learning_rate": 8.490818327687645e-08, "logits/chosen": -2.4472155570983887, "logits/rejected": -2.2407352924346924, "logps/chosen": -276.05633544921875, "logps/rejected": -419.28509521484375, "loss": 0.0306, "rewards/accuracies": 1.0, "rewards/chosen": -1.4732625484466553, "rewards/margins": 15.026802062988281, "rewards/rejected": -16.500064849853516, "step": 10560 }, { "epoch": 2.54, "learning_rate": 8.446247102870386e-08, "logits/chosen": -2.511277437210083, "logits/rejected": -2.3505561351776123, "logps/chosen": -234.39535522460938, "logps/rejected": -350.11541748046875, "loss": 0.0404, "rewards/accuracies": 1.0, "rewards/chosen": -2.86519193649292, "rewards/margins": 11.936873435974121, "rewards/rejected": -14.8020658493042, "step": 10570 }, { "epoch": 2.55, "learning_rate": 8.401675878053129e-08, "logits/chosen": -2.5857067108154297, "logits/rejected": -2.601583957672119, "logps/chosen": -329.2457580566406, "logps/rejected": -431.29754638671875, "loss": 0.0371, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.9668831825256348, "rewards/margins": 11.971174240112305, "rewards/rejected": -15.938056945800781, "step": 10580 }, { "epoch": 2.55, "learning_rate": 8.357104653235871e-08, "logits/chosen": -2.5475380420684814, "logits/rejected": -2.4051592350006104, "logps/chosen": -291.6007080078125, "logps/rejected": -381.60699462890625, "loss": 0.0378, "rewards/accuracies": 1.0, "rewards/chosen": -1.024180293083191, "rewards/margins": 11.200544357299805, "rewards/rejected": -12.224725723266602, "step": 10590 }, { "epoch": 2.55, "learning_rate": 8.312533428418612e-08, "logits/chosen": -2.477518320083618, "logits/rejected": -2.4938597679138184, "logps/chosen": -257.22735595703125, "logps/rejected": -313.45501708984375, "loss": 0.0416, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.8546251058578491, "rewards/margins": 9.755518913269043, "rewards/rejected": -11.610143661499023, "step": 10600 }, { "epoch": 2.55, "learning_rate": 8.267962203601354e-08, "logits/chosen": -2.3990440368652344, "logits/rejected": -2.452619791030884, "logps/chosen": -266.38232421875, "logps/rejected": -357.98199462890625, "loss": 0.0321, "rewards/accuracies": 1.0, "rewards/chosen": -1.0757076740264893, "rewards/margins": 11.956494331359863, "rewards/rejected": -13.032203674316406, "step": 10610 }, { "epoch": 2.56, "learning_rate": 8.223390978784097e-08, "logits/chosen": -2.5130615234375, "logits/rejected": -2.624833583831787, "logps/chosen": -169.1503448486328, "logps/rejected": -341.31689453125, "loss": 0.03, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0461747646331787, "rewards/margins": 11.008197784423828, "rewards/rejected": -12.05437183380127, "step": 10620 }, { "epoch": 2.56, "learning_rate": 8.178819753966839e-08, "logits/chosen": -2.6103034019470215, "logits/rejected": -2.4376797676086426, "logps/chosen": -363.60845947265625, "logps/rejected": -427.32806396484375, "loss": 0.0331, "rewards/accuracies": 1.0, "rewards/chosen": 0.0050119878724217415, "rewards/margins": 13.436140060424805, "rewards/rejected": -13.431129455566406, "step": 10630 }, { "epoch": 2.56, "learning_rate": 8.13424852914958e-08, "logits/chosen": -2.5464351177215576, "logits/rejected": -2.5220837593078613, "logps/chosen": -256.637939453125, "logps/rejected": -359.1075134277344, "loss": 0.0189, "rewards/accuracies": 1.0, "rewards/chosen": -1.0264241695404053, "rewards/margins": 12.260004043579102, "rewards/rejected": -13.286428451538086, "step": 10640 }, { "epoch": 2.56, "learning_rate": 8.089677304332322e-08, "logits/chosen": -2.358281373977661, "logits/rejected": -2.372849225997925, "logps/chosen": -262.013671875, "logps/rejected": -370.379638671875, "loss": 0.0374, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 1.2720009088516235, "rewards/margins": 14.581092834472656, "rewards/rejected": -13.309089660644531, "step": 10650 }, { "epoch": 2.57, "learning_rate": 8.045106079515065e-08, "logits/chosen": -2.3271663188934326, "logits/rejected": -2.308911085128784, "logps/chosen": -289.9702453613281, "logps/rejected": -416.28643798828125, "loss": 0.0329, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.6120532751083374, "rewards/margins": 13.261553764343262, "rewards/rejected": -14.87360668182373, "step": 10660 }, { "epoch": 2.57, "learning_rate": 8.000534854697808e-08, "logits/chosen": -2.602839231491089, "logits/rejected": -2.441680908203125, "logps/chosen": -235.3236083984375, "logps/rejected": -355.11566162109375, "loss": 0.0448, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.6526877880096436, "rewards/margins": 10.503377914428711, "rewards/rejected": -13.15606689453125, "step": 10670 }, { "epoch": 2.57, "learning_rate": 7.955963629880549e-08, "logits/chosen": -2.670300245285034, "logits/rejected": -2.5688750743865967, "logps/chosen": -276.58660888671875, "logps/rejected": -432.313232421875, "loss": 0.0328, "rewards/accuracies": 1.0, "rewards/chosen": -0.6207407712936401, "rewards/margins": 14.357336044311523, "rewards/rejected": -14.978078842163086, "step": 10680 }, { "epoch": 2.57, "learning_rate": 7.911392405063291e-08, "logits/chosen": -2.5545012950897217, "logits/rejected": -2.5492682456970215, "logps/chosen": -211.1891326904297, "logps/rejected": -282.841552734375, "loss": 0.0367, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.9025828242301941, "rewards/margins": 9.527538299560547, "rewards/rejected": -10.430120468139648, "step": 10690 }, { "epoch": 2.58, "learning_rate": 7.866821180246034e-08, "logits/chosen": -2.7213170528411865, "logits/rejected": -2.7161808013916016, "logps/chosen": -258.9945068359375, "logps/rejected": -443.43414306640625, "loss": 0.029, "rewards/accuracies": 1.0, "rewards/chosen": -0.9200226664543152, "rewards/margins": 13.025611877441406, "rewards/rejected": -13.945635795593262, "step": 10700 }, { "epoch": 2.58, "learning_rate": 7.822249955428775e-08, "logits/chosen": -2.664929151535034, "logits/rejected": -2.4846885204315186, "logps/chosen": -241.22171020507812, "logps/rejected": -303.971923828125, "loss": 0.0359, "rewards/accuracies": 1.0, "rewards/chosen": -0.0947069302201271, "rewards/margins": 11.653878211975098, "rewards/rejected": -11.748584747314453, "step": 10710 }, { "epoch": 2.58, "learning_rate": 7.777678730611517e-08, "logits/chosen": -2.4323935508728027, "logits/rejected": -2.452493906021118, "logps/chosen": -386.83465576171875, "logps/rejected": -371.39849853515625, "loss": 0.0425, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.2871506214141846, "rewards/margins": 11.200230598449707, "rewards/rejected": -14.487380981445312, "step": 10720 }, { "epoch": 2.58, "learning_rate": 7.733107505794259e-08, "logits/chosen": -2.409231424331665, "logits/rejected": -2.3314690589904785, "logps/chosen": -220.2698211669922, "logps/rejected": -360.07476806640625, "loss": 0.0194, "rewards/accuracies": 1.0, "rewards/chosen": -2.3193631172180176, "rewards/margins": 12.024897575378418, "rewards/rejected": -14.344259262084961, "step": 10730 }, { "epoch": 2.58, "learning_rate": 7.688536280977002e-08, "logits/chosen": -2.380180835723877, "logits/rejected": -2.447178602218628, "logps/chosen": -223.9288787841797, "logps/rejected": -345.01123046875, "loss": 0.0337, "rewards/accuracies": 1.0, "rewards/chosen": -1.7370744943618774, "rewards/margins": 11.357762336730957, "rewards/rejected": -13.09483814239502, "step": 10740 }, { "epoch": 2.59, "learning_rate": 7.643965056159743e-08, "logits/chosen": -2.5712685585021973, "logits/rejected": -2.560652256011963, "logps/chosen": -239.720458984375, "logps/rejected": -363.42645263671875, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -0.7917028665542603, "rewards/margins": 10.572433471679688, "rewards/rejected": -11.3641357421875, "step": 10750 }, { "epoch": 2.59, "learning_rate": 7.599393831342485e-08, "logits/chosen": -2.3512864112854004, "logits/rejected": -2.2663590908050537, "logps/chosen": -251.39840698242188, "logps/rejected": -387.4175109863281, "loss": 0.0259, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.3710943460464478, "rewards/margins": 15.084039688110352, "rewards/rejected": -16.455135345458984, "step": 10760 }, { "epoch": 2.59, "learning_rate": 7.554822606525226e-08, "logits/chosen": -2.5970118045806885, "logits/rejected": -2.5169131755828857, "logps/chosen": -300.6658630371094, "logps/rejected": -375.5133361816406, "loss": 0.0293, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.221968173980713, "rewards/margins": 9.254558563232422, "rewards/rejected": -11.476526260375977, "step": 10770 }, { "epoch": 2.59, "learning_rate": 7.510251381707969e-08, "logits/chosen": -2.3693273067474365, "logits/rejected": -2.297300338745117, "logps/chosen": -257.8289794921875, "logps/rejected": -330.83441162109375, "loss": 0.0433, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.5106990337371826, "rewards/margins": 9.142082214355469, "rewards/rejected": -10.652780532836914, "step": 10780 }, { "epoch": 2.6, "learning_rate": 7.465680156890711e-08, "logits/chosen": -2.4781227111816406, "logits/rejected": -2.488508701324463, "logps/chosen": -253.02694702148438, "logps/rejected": -379.565185546875, "loss": 0.0388, "rewards/accuracies": 1.0, "rewards/chosen": 0.2573682367801666, "rewards/margins": 13.718050956726074, "rewards/rejected": -13.460683822631836, "step": 10790 }, { "epoch": 2.6, "learning_rate": 7.421108932073453e-08, "logits/chosen": -2.562633991241455, "logits/rejected": -2.611072063446045, "logps/chosen": -240.0339813232422, "logps/rejected": -447.7969665527344, "loss": 0.0577, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.4941372871398926, "rewards/margins": 11.667159080505371, "rewards/rejected": -14.161297798156738, "step": 10800 }, { "epoch": 2.6, "eval_logits/chosen": -2.2341954708099365, "eval_logits/rejected": -2.196829319000244, "eval_logps/chosen": -278.65399169921875, "eval_logps/rejected": -309.213623046875, "eval_loss": 0.6290140748023987, "eval_rewards/accuracies": 0.6924999952316284, "eval_rewards/chosen": -7.67264461517334, "eval_rewards/margins": 4.295636177062988, "eval_rewards/rejected": -11.968280792236328, "eval_runtime": 133.4667, "eval_samples_per_second": 23.646, "eval_steps_per_second": 0.375, "step": 10800 }, { "epoch": 2.6, "learning_rate": 7.376537707256194e-08, "logits/chosen": -2.5272409915924072, "logits/rejected": -2.4531893730163574, "logps/chosen": -248.57968139648438, "logps/rejected": -338.3380432128906, "loss": 0.0229, "rewards/accuracies": 1.0, "rewards/chosen": -1.2790850400924683, "rewards/margins": 10.274438858032227, "rewards/rejected": -11.553524017333984, "step": 10810 }, { "epoch": 2.6, "learning_rate": 7.331966482438937e-08, "logits/chosen": -2.4732887744903564, "logits/rejected": -2.432586908340454, "logps/chosen": -226.4634246826172, "logps/rejected": -326.443603515625, "loss": 0.0276, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.4939928948879242, "rewards/margins": 12.521749496459961, "rewards/rejected": -12.02775764465332, "step": 10820 }, { "epoch": 2.61, "learning_rate": 7.287395257621679e-08, "logits/chosen": -2.3963871002197266, "logits/rejected": -2.37272572517395, "logps/chosen": -323.8963928222656, "logps/rejected": -337.9401550292969, "loss": 0.0268, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.5154550075531006, "rewards/margins": 10.958158493041992, "rewards/rejected": -13.473612785339355, "step": 10830 }, { "epoch": 2.61, "learning_rate": 7.24282403280442e-08, "logits/chosen": -2.660106658935547, "logits/rejected": -2.6419565677642822, "logps/chosen": -323.8818359375, "logps/rejected": -458.4462890625, "loss": 0.0233, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8459736108779907, "rewards/margins": 13.067835807800293, "rewards/rejected": -13.913810729980469, "step": 10840 }, { "epoch": 2.61, "learning_rate": 7.198252807987163e-08, "logits/chosen": -2.5293502807617188, "logits/rejected": -2.537257194519043, "logps/chosen": -194.8105010986328, "logps/rejected": -336.81829833984375, "loss": 0.0415, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.9545141458511353, "rewards/margins": 10.744138717651367, "rewards/rejected": -12.698652267456055, "step": 10850 }, { "epoch": 2.61, "learning_rate": 7.153681583169906e-08, "logits/chosen": -2.5184950828552246, "logits/rejected": -2.4280002117156982, "logps/chosen": -196.51992797851562, "logps/rejected": -296.865966796875, "loss": 0.0319, "rewards/accuracies": 1.0, "rewards/chosen": -0.675471842288971, "rewards/margins": 12.040105819702148, "rewards/rejected": -12.71557903289795, "step": 10860 }, { "epoch": 2.62, "learning_rate": 7.109110358352648e-08, "logits/chosen": -2.5834603309631348, "logits/rejected": -2.5854382514953613, "logps/chosen": -298.9248046875, "logps/rejected": -413.6004943847656, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -1.8956855535507202, "rewards/margins": 14.540181159973145, "rewards/rejected": -16.435867309570312, "step": 10870 }, { "epoch": 2.62, "learning_rate": 7.06453913353539e-08, "logits/chosen": -2.5333352088928223, "logits/rejected": -2.5061662197113037, "logps/chosen": -258.09515380859375, "logps/rejected": -396.55780029296875, "loss": 0.0182, "rewards/accuracies": 1.0, "rewards/chosen": 0.07149648666381836, "rewards/margins": 14.085748672485352, "rewards/rejected": -14.014251708984375, "step": 10880 }, { "epoch": 2.62, "learning_rate": 7.019967908718131e-08, "logits/chosen": -2.518632411956787, "logits/rejected": -2.4010226726531982, "logps/chosen": -271.84112548828125, "logps/rejected": -346.02154541015625, "loss": 0.0203, "rewards/accuracies": 1.0, "rewards/chosen": -1.4661548137664795, "rewards/margins": 12.127985954284668, "rewards/rejected": -13.594141006469727, "step": 10890 }, { "epoch": 2.62, "learning_rate": 6.975396683900874e-08, "logits/chosen": -2.5858657360076904, "logits/rejected": -2.621828317642212, "logps/chosen": -223.5819549560547, "logps/rejected": -403.63055419921875, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -2.2753188610076904, "rewards/margins": 11.72504997253418, "rewards/rejected": -14.00036907196045, "step": 10900 }, { "epoch": 2.63, "learning_rate": 6.930825459083616e-08, "logits/chosen": -2.711143970489502, "logits/rejected": -2.5317392349243164, "logps/chosen": -312.88897705078125, "logps/rejected": -298.1280822753906, "loss": 0.0295, "rewards/accuracies": 1.0, "rewards/chosen": -1.8430429697036743, "rewards/margins": 10.042176246643066, "rewards/rejected": -11.885217666625977, "step": 10910 }, { "epoch": 2.63, "learning_rate": 6.886254234266357e-08, "logits/chosen": -2.4132256507873535, "logits/rejected": -2.299051284790039, "logps/chosen": -294.41107177734375, "logps/rejected": -299.05609130859375, "loss": 0.0399, "rewards/accuracies": 1.0, "rewards/chosen": -1.5870959758758545, "rewards/margins": 9.86145305633545, "rewards/rejected": -11.448549270629883, "step": 10920 }, { "epoch": 2.63, "learning_rate": 6.841683009449099e-08, "logits/chosen": -2.2700419425964355, "logits/rejected": -2.2279114723205566, "logps/chosen": -242.2595977783203, "logps/rejected": -358.4086608886719, "loss": 0.0173, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.5513057708740234, "rewards/margins": 10.892278671264648, "rewards/rejected": -14.443583488464355, "step": 10930 }, { "epoch": 2.63, "learning_rate": 6.797111784631842e-08, "logits/chosen": -2.401989221572876, "logits/rejected": -2.4412052631378174, "logps/chosen": -284.970458984375, "logps/rejected": -384.956787109375, "loss": 0.0381, "rewards/accuracies": 1.0, "rewards/chosen": -0.6625072360038757, "rewards/margins": 14.448089599609375, "rewards/rejected": -15.110595703125, "step": 10940 }, { "epoch": 2.64, "learning_rate": 6.752540559814583e-08, "logits/chosen": -2.272688627243042, "logits/rejected": -2.2353696823120117, "logps/chosen": -220.97085571289062, "logps/rejected": -279.1014404296875, "loss": 0.0266, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.1295342445373535, "rewards/margins": 10.96465015411377, "rewards/rejected": -13.094184875488281, "step": 10950 }, { "epoch": 2.64, "learning_rate": 6.707969334997325e-08, "logits/chosen": -2.442444324493408, "logits/rejected": -2.391512393951416, "logps/chosen": -264.117919921875, "logps/rejected": -352.73162841796875, "loss": 0.0339, "rewards/accuracies": 1.0, "rewards/chosen": -0.863391101360321, "rewards/margins": 11.013391494750977, "rewards/rejected": -11.876781463623047, "step": 10960 }, { "epoch": 2.64, "learning_rate": 6.663398110180066e-08, "logits/chosen": -2.403355836868286, "logits/rejected": -2.4246935844421387, "logps/chosen": -193.77670288085938, "logps/rejected": -314.63702392578125, "loss": 0.0207, "rewards/accuracies": 1.0, "rewards/chosen": -2.0767998695373535, "rewards/margins": 11.021893501281738, "rewards/rejected": -13.09869384765625, "step": 10970 }, { "epoch": 2.64, "learning_rate": 6.61882688536281e-08, "logits/chosen": -2.401864767074585, "logits/rejected": -2.3092455863952637, "logps/chosen": -332.11383056640625, "logps/rejected": -365.3248596191406, "loss": 0.036, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.2405102252960205, "rewards/margins": 13.149075508117676, "rewards/rejected": -14.3895845413208, "step": 10980 }, { "epoch": 2.65, "learning_rate": 6.574255660545551e-08, "logits/chosen": -2.717160701751709, "logits/rejected": -2.5159428119659424, "logps/chosen": -383.21368408203125, "logps/rejected": -334.6822509765625, "loss": 0.0354, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.6736847162246704, "rewards/margins": 9.387896537780762, "rewards/rejected": -11.0615816116333, "step": 10990 }, { "epoch": 2.65, "learning_rate": 6.529684435728293e-08, "logits/chosen": -2.600236415863037, "logits/rejected": -2.3790392875671387, "logps/chosen": -285.241455078125, "logps/rejected": -324.14349365234375, "loss": 0.0212, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.9113547801971436, "rewards/margins": 9.267632484436035, "rewards/rejected": -13.178987503051758, "step": 11000 }, { "epoch": 2.65, "learning_rate": 6.485113210911034e-08, "logits/chosen": -2.3917620182037354, "logits/rejected": -2.319744825363159, "logps/chosen": -240.36331176757812, "logps/rejected": -335.1694641113281, "loss": 0.0236, "rewards/accuracies": 1.0, "rewards/chosen": -2.0478901863098145, "rewards/margins": 11.181989669799805, "rewards/rejected": -13.229879379272461, "step": 11010 }, { "epoch": 2.65, "learning_rate": 6.440541986093779e-08, "logits/chosen": -2.3351759910583496, "logits/rejected": -2.4058279991149902, "logps/chosen": -224.92684936523438, "logps/rejected": -374.34259033203125, "loss": 0.019, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.7083711624145508, "rewards/margins": 13.359003067016602, "rewards/rejected": -15.067373275756836, "step": 11020 }, { "epoch": 2.65, "learning_rate": 6.39597076127652e-08, "logits/chosen": -2.4472146034240723, "logits/rejected": -2.3554482460021973, "logps/chosen": -245.9796142578125, "logps/rejected": -351.9076232910156, "loss": 0.0351, "rewards/accuracies": 1.0, "rewards/chosen": -0.9210950136184692, "rewards/margins": 13.461766242980957, "rewards/rejected": -14.382861137390137, "step": 11030 }, { "epoch": 2.66, "learning_rate": 6.351399536459262e-08, "logits/chosen": -2.6213226318359375, "logits/rejected": -2.6294121742248535, "logps/chosen": -273.3938903808594, "logps/rejected": -370.6047668457031, "loss": 0.0187, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.065000295639038, "rewards/margins": 11.00334358215332, "rewards/rejected": -13.068344116210938, "step": 11040 }, { "epoch": 2.66, "learning_rate": 6.306828311642005e-08, "logits/chosen": -2.588347911834717, "logits/rejected": -2.5161221027374268, "logps/chosen": -238.6955108642578, "logps/rejected": -331.37200927734375, "loss": 0.0286, "rewards/accuracies": 1.0, "rewards/chosen": -1.2676331996917725, "rewards/margins": 11.658307075500488, "rewards/rejected": -12.925939559936523, "step": 11050 }, { "epoch": 2.66, "learning_rate": 6.262257086824746e-08, "logits/chosen": -2.4580318927764893, "logits/rejected": -2.496865749359131, "logps/chosen": -248.82595825195312, "logps/rejected": -391.84716796875, "loss": 0.0293, "rewards/accuracies": 1.0, "rewards/chosen": -1.505751609802246, "rewards/margins": 14.90924072265625, "rewards/rejected": -16.41499137878418, "step": 11060 }, { "epoch": 2.66, "learning_rate": 6.217685862007488e-08, "logits/chosen": -2.487553834915161, "logits/rejected": -2.5703344345092773, "logps/chosen": -274.4038391113281, "logps/rejected": -426.31304931640625, "loss": 0.0294, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0016109943389893, "rewards/margins": 12.330926895141602, "rewards/rejected": -13.332539558410645, "step": 11070 }, { "epoch": 2.67, "learning_rate": 6.17311463719023e-08, "logits/chosen": -2.2672083377838135, "logits/rejected": -2.265392541885376, "logps/chosen": -226.40185546875, "logps/rejected": -280.76251220703125, "loss": 0.0337, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.9002182483673096, "rewards/margins": 8.221508979797363, "rewards/rejected": -12.121726036071777, "step": 11080 }, { "epoch": 2.67, "learning_rate": 6.128543412372972e-08, "logits/chosen": -2.476393938064575, "logits/rejected": -2.365628957748413, "logps/chosen": -314.09381103515625, "logps/rejected": -401.48175048828125, "loss": 0.0229, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1945843696594238, "rewards/margins": 10.73908519744873, "rewards/rejected": -11.93366813659668, "step": 11090 }, { "epoch": 2.67, "learning_rate": 6.083972187555714e-08, "logits/chosen": -2.393922805786133, "logits/rejected": -2.3239188194274902, "logps/chosen": -202.66156005859375, "logps/rejected": -266.5211486816406, "loss": 0.0403, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.612478256225586, "rewards/margins": 12.316023826599121, "rewards/rejected": -13.928503036499023, "step": 11100 }, { "epoch": 2.67, "learning_rate": 6.039400962738456e-08, "logits/chosen": -2.3518242835998535, "logits/rejected": -2.378859043121338, "logps/chosen": -454.7975158691406, "logps/rejected": -392.9176940917969, "loss": 0.0281, "rewards/accuracies": 1.0, "rewards/chosen": -2.3876655101776123, "rewards/margins": 12.167783737182617, "rewards/rejected": -14.555447578430176, "step": 11110 }, { "epoch": 2.68, "learning_rate": 5.994829737921197e-08, "logits/chosen": -2.493408679962158, "logits/rejected": -2.4440033435821533, "logps/chosen": -247.04806518554688, "logps/rejected": -359.9694519042969, "loss": 0.0347, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.431443691253662, "rewards/margins": 10.620159149169922, "rewards/rejected": -14.051603317260742, "step": 11120 }, { "epoch": 2.68, "learning_rate": 5.9502585131039395e-08, "logits/chosen": -2.4963178634643555, "logits/rejected": -2.298515558242798, "logps/chosen": -268.14385986328125, "logps/rejected": -344.11749267578125, "loss": 0.0298, "rewards/accuracies": 1.0, "rewards/chosen": -2.784672498703003, "rewards/margins": 11.713040351867676, "rewards/rejected": -14.497714042663574, "step": 11130 }, { "epoch": 2.68, "learning_rate": 5.9056872882866825e-08, "logits/chosen": -2.4330027103424072, "logits/rejected": -2.4529852867126465, "logps/chosen": -238.3878936767578, "logps/rejected": -320.8956604003906, "loss": 0.0492, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.8928139209747314, "rewards/margins": 9.035645484924316, "rewards/rejected": -12.928457260131836, "step": 11140 }, { "epoch": 2.68, "learning_rate": 5.861116063469424e-08, "logits/chosen": -2.3670287132263184, "logits/rejected": -2.414227247238159, "logps/chosen": -202.35137939453125, "logps/rejected": -309.3493957519531, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -0.23711788654327393, "rewards/margins": 12.99688720703125, "rewards/rejected": -13.234004020690918, "step": 11150 }, { "epoch": 2.69, "learning_rate": 5.8165448386521663e-08, "logits/chosen": -2.4794349670410156, "logits/rejected": -2.357036590576172, "logps/chosen": -299.997802734375, "logps/rejected": -373.39080810546875, "loss": 0.0994, "rewards/accuracies": 1.0, "rewards/chosen": -0.8509117960929871, "rewards/margins": 15.069299697875977, "rewards/rejected": -15.920211791992188, "step": 11160 }, { "epoch": 2.69, "learning_rate": 5.771973613834908e-08, "logits/chosen": -2.2667782306671143, "logits/rejected": -2.3640787601470947, "logps/chosen": -257.46453857421875, "logps/rejected": -464.90216064453125, "loss": 0.0448, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.3583171367645264, "rewards/margins": 12.97047233581543, "rewards/rejected": -15.328790664672852, "step": 11170 }, { "epoch": 2.69, "learning_rate": 5.72740238901765e-08, "logits/chosen": -2.4249043464660645, "logits/rejected": -2.1784474849700928, "logps/chosen": -285.65008544921875, "logps/rejected": -348.6196594238281, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -1.3535035848617554, "rewards/margins": 12.062604904174805, "rewards/rejected": -13.416107177734375, "step": 11180 }, { "epoch": 2.69, "learning_rate": 5.682831164200392e-08, "logits/chosen": -2.6384506225585938, "logits/rejected": -2.4709858894348145, "logps/chosen": -302.586669921875, "logps/rejected": -415.0191345214844, "loss": 0.0181, "rewards/accuracies": 1.0, "rewards/chosen": -0.9068059921264648, "rewards/margins": 13.053243637084961, "rewards/rejected": -13.960049629211426, "step": 11190 }, { "epoch": 2.7, "learning_rate": 5.638259939383134e-08, "logits/chosen": -2.524169921875, "logits/rejected": -2.40922474861145, "logps/chosen": -360.9150390625, "logps/rejected": -444.46533203125, "loss": 0.019, "rewards/accuracies": 1.0, "rewards/chosen": -1.248106837272644, "rewards/margins": 13.789751052856445, "rewards/rejected": -15.037857055664062, "step": 11200 }, { "epoch": 2.7, "eval_logits/chosen": -2.2006473541259766, "eval_logits/rejected": -2.162346839904785, "eval_logps/chosen": -274.2283935546875, "eval_logps/rejected": -304.8287048339844, "eval_loss": 0.6259632706642151, "eval_rewards/accuracies": 0.6825000047683716, "eval_rewards/chosen": -7.230083465576172, "eval_rewards/margins": 4.299704074859619, "eval_rewards/rejected": -11.52978801727295, "eval_runtime": 134.2311, "eval_samples_per_second": 23.512, "eval_steps_per_second": 0.372, "step": 11200 }, { "epoch": 2.7, "learning_rate": 5.593688714565876e-08, "logits/chosen": -2.505527973175049, "logits/rejected": -2.483219861984253, "logps/chosen": -285.79278564453125, "logps/rejected": -396.4827880859375, "loss": 0.0474, "rewards/accuracies": 1.0, "rewards/chosen": -2.0627493858337402, "rewards/margins": 10.589037895202637, "rewards/rejected": -12.651786804199219, "step": 11210 }, { "epoch": 2.7, "learning_rate": 5.549117489748618e-08, "logits/chosen": -2.4316835403442383, "logits/rejected": -2.2311272621154785, "logps/chosen": -407.55078125, "logps/rejected": -559.0494995117188, "loss": 0.0299, "rewards/accuracies": 1.0, "rewards/chosen": 1.7516244649887085, "rewards/margins": 19.262540817260742, "rewards/rejected": -17.51091766357422, "step": 11220 }, { "epoch": 2.7, "learning_rate": 5.50454626493136e-08, "logits/chosen": -2.3240840435028076, "logits/rejected": -2.3160452842712402, "logps/chosen": -206.2075653076172, "logps/rejected": -300.61151123046875, "loss": 0.0296, "rewards/accuracies": 1.0, "rewards/chosen": -0.806415855884552, "rewards/margins": 12.24956226348877, "rewards/rejected": -13.055978775024414, "step": 11230 }, { "epoch": 2.71, "learning_rate": 5.4599750401141025e-08, "logits/chosen": -2.434230089187622, "logits/rejected": -2.3133277893066406, "logps/chosen": -243.75863647460938, "logps/rejected": -367.0157775878906, "loss": 0.0245, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.664422035217285, "rewards/margins": 10.782770156860352, "rewards/rejected": -13.447192192077637, "step": 11240 }, { "epoch": 2.71, "learning_rate": 5.415403815296844e-08, "logits/chosen": -2.3377509117126465, "logits/rejected": -2.351390838623047, "logps/chosen": -274.16522216796875, "logps/rejected": -353.56158447265625, "loss": 0.0299, "rewards/accuracies": 1.0, "rewards/chosen": -0.5112389922142029, "rewards/margins": 13.706448554992676, "rewards/rejected": -14.217687606811523, "step": 11250 }, { "epoch": 2.71, "learning_rate": 5.3708325904795864e-08, "logits/chosen": -2.5061848163604736, "logits/rejected": -2.394094944000244, "logps/chosen": -280.7466735839844, "logps/rejected": -361.4125061035156, "loss": 0.0191, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.9356305599212646, "rewards/margins": 10.402562141418457, "rewards/rejected": -12.3381929397583, "step": 11260 }, { "epoch": 2.71, "learning_rate": 5.326261365662328e-08, "logits/chosen": -2.435429096221924, "logits/rejected": -2.3512542247772217, "logps/chosen": -250.2201385498047, "logps/rejected": -404.38128662109375, "loss": 0.0343, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6449331641197205, "rewards/margins": 14.451985359191895, "rewards/rejected": -15.096919059753418, "step": 11270 }, { "epoch": 2.71, "learning_rate": 5.28169014084507e-08, "logits/chosen": -2.5936787128448486, "logits/rejected": -2.437343120574951, "logps/chosen": -452.4847106933594, "logps/rejected": -542.2637939453125, "loss": 0.0378, "rewards/accuracies": 1.0, "rewards/chosen": 1.385621190071106, "rewards/margins": 18.973867416381836, "rewards/rejected": -17.588247299194336, "step": 11280 }, { "epoch": 2.72, "learning_rate": 5.237118916027812e-08, "logits/chosen": -2.3915462493896484, "logits/rejected": -2.471947193145752, "logps/chosen": -235.27487182617188, "logps/rejected": -347.845458984375, "loss": 0.0171, "rewards/accuracies": 1.0, "rewards/chosen": -1.0457872152328491, "rewards/margins": 12.638503074645996, "rewards/rejected": -13.684290885925293, "step": 11290 }, { "epoch": 2.72, "learning_rate": 5.192547691210554e-08, "logits/chosen": -2.5435686111450195, "logits/rejected": -2.5178191661834717, "logps/chosen": -286.24530029296875, "logps/rejected": -341.6558837890625, "loss": 0.0325, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.60090970993042, "rewards/margins": 9.717835426330566, "rewards/rejected": -13.318742752075195, "step": 11300 }, { "epoch": 2.72, "learning_rate": 5.147976466393296e-08, "logits/chosen": -2.3367373943328857, "logits/rejected": -2.254424810409546, "logps/chosen": -204.71485900878906, "logps/rejected": -354.184814453125, "loss": 0.0259, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.430279731750488, "rewards/margins": 10.637048721313477, "rewards/rejected": -15.067327499389648, "step": 11310 }, { "epoch": 2.72, "learning_rate": 5.103405241576039e-08, "logits/chosen": -2.33821177482605, "logits/rejected": -2.2306008338928223, "logps/chosen": -188.8552703857422, "logps/rejected": -264.550048828125, "loss": 0.0239, "rewards/accuracies": 1.0, "rewards/chosen": -2.7913947105407715, "rewards/margins": 10.940762519836426, "rewards/rejected": -13.732156753540039, "step": 11320 }, { "epoch": 2.73, "learning_rate": 5.05883401675878e-08, "logits/chosen": -2.525364637374878, "logits/rejected": -2.46457839012146, "logps/chosen": -343.2807312011719, "logps/rejected": -489.612060546875, "loss": 0.02, "rewards/accuracies": 1.0, "rewards/chosen": 0.20165547728538513, "rewards/margins": 17.081844329833984, "rewards/rejected": -16.88018798828125, "step": 11330 }, { "epoch": 2.73, "learning_rate": 5.0142627919415226e-08, "logits/chosen": -2.560027599334717, "logits/rejected": -2.4247658252716064, "logps/chosen": -215.8699493408203, "logps/rejected": -268.9886169433594, "loss": 0.023, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.39333075284957886, "rewards/margins": 11.755952835083008, "rewards/rejected": -11.362622261047363, "step": 11340 }, { "epoch": 2.73, "learning_rate": 4.969691567124264e-08, "logits/chosen": -2.461838960647583, "logits/rejected": -2.450381278991699, "logps/chosen": -364.3572998046875, "logps/rejected": -452.86669921875, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": -0.1634099930524826, "rewards/margins": 12.84306526184082, "rewards/rejected": -13.006475448608398, "step": 11350 }, { "epoch": 2.73, "learning_rate": 4.9251203423070065e-08, "logits/chosen": -2.484093189239502, "logits/rejected": -2.3913586139678955, "logps/chosen": -239.50033569335938, "logps/rejected": -251.77700805664062, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": -1.1334115266799927, "rewards/margins": 10.335721015930176, "rewards/rejected": -11.469133377075195, "step": 11360 }, { "epoch": 2.74, "learning_rate": 4.880549117489748e-08, "logits/chosen": -2.5914254188537598, "logits/rejected": -2.37621808052063, "logps/chosen": -341.57904052734375, "logps/rejected": -336.15582275390625, "loss": 0.027, "rewards/accuracies": 1.0, "rewards/chosen": -1.0589287281036377, "rewards/margins": 11.350578308105469, "rewards/rejected": -12.409507751464844, "step": 11370 }, { "epoch": 2.74, "learning_rate": 4.8359778926724904e-08, "logits/chosen": -2.3758625984191895, "logits/rejected": -2.3536620140075684, "logps/chosen": -271.1437683105469, "logps/rejected": -358.9661560058594, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -1.1041805744171143, "rewards/margins": 12.512085914611816, "rewards/rejected": -13.616266250610352, "step": 11380 }, { "epoch": 2.74, "learning_rate": 4.791406667855232e-08, "logits/chosen": -2.556595802307129, "logits/rejected": -2.530003309249878, "logps/chosen": -297.84808349609375, "logps/rejected": -388.65423583984375, "loss": 0.0312, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1992433071136475, "rewards/margins": 12.781665802001953, "rewards/rejected": -13.98090934753418, "step": 11390 }, { "epoch": 2.74, "learning_rate": 4.746835443037975e-08, "logits/chosen": -2.4237866401672363, "logits/rejected": -2.2597763538360596, "logps/chosen": -250.51962280273438, "logps/rejected": -363.7515869140625, "loss": 0.0236, "rewards/accuracies": 1.0, "rewards/chosen": -1.5471956729888916, "rewards/margins": 13.305665969848633, "rewards/rejected": -14.852861404418945, "step": 11400 }, { "epoch": 2.75, "learning_rate": 4.7022642182207165e-08, "logits/chosen": -2.556197166442871, "logits/rejected": -2.604642391204834, "logps/chosen": -412.33660888671875, "logps/rejected": -461.094482421875, "loss": 0.0377, "rewards/accuracies": 1.0, "rewards/chosen": -0.17471656203269958, "rewards/margins": 13.933385848999023, "rewards/rejected": -14.108102798461914, "step": 11410 }, { "epoch": 2.75, "learning_rate": 4.657692993403459e-08, "logits/chosen": -2.2716872692108154, "logits/rejected": -2.2458138465881348, "logps/chosen": -219.63784790039062, "logps/rejected": -359.21185302734375, "loss": 0.0199, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.3408005237579346, "rewards/margins": 11.46550178527832, "rewards/rejected": -13.806303024291992, "step": 11420 }, { "epoch": 2.75, "learning_rate": 4.6131217685862004e-08, "logits/chosen": -2.3509669303894043, "logits/rejected": -2.422544002532959, "logps/chosen": -205.3550567626953, "logps/rejected": -342.86895751953125, "loss": 0.0244, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.3986648321151733, "rewards/margins": 13.367385864257812, "rewards/rejected": -14.766050338745117, "step": 11430 }, { "epoch": 2.75, "learning_rate": 4.5685505437689427e-08, "logits/chosen": -2.606081962585449, "logits/rejected": -2.4485602378845215, "logps/chosen": -339.7007141113281, "logps/rejected": -361.31219482421875, "loss": 0.0168, "rewards/accuracies": 1.0, "rewards/chosen": -1.799737572669983, "rewards/margins": 10.338393211364746, "rewards/rejected": -12.138131141662598, "step": 11440 }, { "epoch": 2.76, "learning_rate": 4.523979318951684e-08, "logits/chosen": -2.4293484687805176, "logits/rejected": -2.3089652061462402, "logps/chosen": -288.80133056640625, "logps/rejected": -384.42108154296875, "loss": 0.0339, "rewards/accuracies": 1.0, "rewards/chosen": -2.2991480827331543, "rewards/margins": 11.283964157104492, "rewards/rejected": -13.583112716674805, "step": 11450 }, { "epoch": 2.76, "learning_rate": 4.4794080941344265e-08, "logits/chosen": -2.436283588409424, "logits/rejected": -2.402207851409912, "logps/chosen": -303.0555114746094, "logps/rejected": -428.24371337890625, "loss": 0.0271, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.7075694799423218, "rewards/margins": 13.223734855651855, "rewards/rejected": -14.931304931640625, "step": 11460 }, { "epoch": 2.76, "learning_rate": 4.434836869317168e-08, "logits/chosen": -2.375192165374756, "logits/rejected": -2.3349125385284424, "logps/chosen": -361.15667724609375, "logps/rejected": -416.8773498535156, "loss": 0.0327, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.413932800292969, "rewards/margins": 10.136090278625488, "rewards/rejected": -14.550024032592773, "step": 11470 }, { "epoch": 2.76, "learning_rate": 4.3902656444999104e-08, "logits/chosen": -2.4863932132720947, "logits/rejected": -2.433408498764038, "logps/chosen": -290.609375, "logps/rejected": -377.8664245605469, "loss": 0.0243, "rewards/accuracies": 1.0, "rewards/chosen": -0.6029821634292603, "rewards/margins": 13.398452758789062, "rewards/rejected": -14.001434326171875, "step": 11480 }, { "epoch": 2.77, "learning_rate": 4.345694419682653e-08, "logits/chosen": -2.5686421394348145, "logits/rejected": -2.42051100730896, "logps/chosen": -258.9626770019531, "logps/rejected": -362.4114074707031, "loss": 0.0309, "rewards/accuracies": 1.0, "rewards/chosen": -1.0931127071380615, "rewards/margins": 11.589851379394531, "rewards/rejected": -12.682964324951172, "step": 11490 }, { "epoch": 2.77, "learning_rate": 4.301123194865395e-08, "logits/chosen": -2.4282636642456055, "logits/rejected": -2.4071927070617676, "logps/chosen": -294.930419921875, "logps/rejected": -374.0691223144531, "loss": 0.0275, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.04943211004137993, "rewards/margins": 12.023344039916992, "rewards/rejected": -11.973912239074707, "step": 11500 }, { "epoch": 2.77, "learning_rate": 4.2565519700481366e-08, "logits/chosen": -2.457975387573242, "logits/rejected": -2.432142734527588, "logps/chosen": -220.9159698486328, "logps/rejected": -428.310546875, "loss": 0.0738, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.2498180866241455, "rewards/margins": 14.47473430633545, "rewards/rejected": -14.724553108215332, "step": 11510 }, { "epoch": 2.77, "learning_rate": 4.211980745230879e-08, "logits/chosen": -2.371051788330078, "logits/rejected": -2.4538283348083496, "logps/chosen": -263.51220703125, "logps/rejected": -334.24102783203125, "loss": 0.033, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.7677242755889893, "rewards/margins": 11.65418815612793, "rewards/rejected": -13.421911239624023, "step": 11520 }, { "epoch": 2.77, "learning_rate": 4.1674095204136205e-08, "logits/chosen": -2.494645595550537, "logits/rejected": -2.414339303970337, "logps/chosen": -265.6726989746094, "logps/rejected": -371.0367736816406, "loss": 0.0582, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7944694757461548, "rewards/margins": 10.575716972351074, "rewards/rejected": -11.370186805725098, "step": 11530 }, { "epoch": 2.78, "learning_rate": 4.122838295596363e-08, "logits/chosen": -2.43843412399292, "logits/rejected": -2.4583516120910645, "logps/chosen": -208.3157958984375, "logps/rejected": -390.5905456542969, "loss": 0.0331, "rewards/accuracies": 1.0, "rewards/chosen": -1.8241792917251587, "rewards/margins": 14.587809562683105, "rewards/rejected": -16.4119873046875, "step": 11540 }, { "epoch": 2.78, "learning_rate": 4.0782670707791043e-08, "logits/chosen": -2.3243870735168457, "logits/rejected": -2.2328686714172363, "logps/chosen": -242.69369506835938, "logps/rejected": -397.82794189453125, "loss": 0.037, "rewards/accuracies": 1.0, "rewards/chosen": -1.1840770244598389, "rewards/margins": 14.214498519897461, "rewards/rejected": -15.398576736450195, "step": 11550 }, { "epoch": 2.78, "learning_rate": 4.0336958459618466e-08, "logits/chosen": -2.587477922439575, "logits/rejected": -2.4498391151428223, "logps/chosen": -353.0970764160156, "logps/rejected": -425.1826171875, "loss": 0.0613, "rewards/accuracies": 1.0, "rewards/chosen": -1.048505425453186, "rewards/margins": 13.219686508178711, "rewards/rejected": -14.26819133758545, "step": 11560 }, { "epoch": 2.78, "learning_rate": 3.989124621144589e-08, "logits/chosen": -2.463712692260742, "logits/rejected": -2.4487709999084473, "logps/chosen": -251.2293701171875, "logps/rejected": -348.6514892578125, "loss": 0.0485, "rewards/accuracies": 1.0, "rewards/chosen": -1.0674989223480225, "rewards/margins": 12.788418769836426, "rewards/rejected": -13.855916976928711, "step": 11570 }, { "epoch": 2.79, "learning_rate": 3.944553396327331e-08, "logits/chosen": -2.357205867767334, "logits/rejected": -2.3697304725646973, "logps/chosen": -231.0236358642578, "logps/rejected": -308.3990478515625, "loss": 0.0381, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.9180500507354736, "rewards/margins": 9.993398666381836, "rewards/rejected": -13.91144847869873, "step": 11580 }, { "epoch": 2.79, "learning_rate": 3.899982171510073e-08, "logits/chosen": -2.4156291484832764, "logits/rejected": -2.42352557182312, "logps/chosen": -244.0848388671875, "logps/rejected": -332.1519470214844, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -3.404517650604248, "rewards/margins": 10.540016174316406, "rewards/rejected": -13.944534301757812, "step": 11590 }, { "epoch": 2.79, "learning_rate": 3.855410946692815e-08, "logits/chosen": -2.407479763031006, "logits/rejected": -2.460360288619995, "logps/chosen": -388.8629455566406, "logps/rejected": -623.1227416992188, "loss": 0.0328, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.769385576248169, "rewards/margins": 13.049238204956055, "rewards/rejected": -15.818624496459961, "step": 11600 }, { "epoch": 2.79, "eval_logits/chosen": -2.176734447479248, "eval_logits/rejected": -2.13875675201416, "eval_logps/chosen": -278.0234069824219, "eval_logps/rejected": -309.6459655761719, "eval_loss": 0.6324948668479919, "eval_rewards/accuracies": 0.6949999928474426, "eval_rewards/chosen": -7.609586715698242, "eval_rewards/margins": 4.401934623718262, "eval_rewards/rejected": -12.011521339416504, "eval_runtime": 132.5011, "eval_samples_per_second": 23.819, "eval_steps_per_second": 0.377, "step": 11600 }, { "epoch": 2.79, "learning_rate": 3.8108397218755566e-08, "logits/chosen": -2.354337453842163, "logits/rejected": -2.3427820205688477, "logps/chosen": -253.9640655517578, "logps/rejected": -356.81402587890625, "loss": 0.0191, "rewards/accuracies": 1.0, "rewards/chosen": -1.540785312652588, "rewards/margins": 9.694784164428711, "rewards/rejected": -11.235569953918457, "step": 11610 }, { "epoch": 2.8, "learning_rate": 3.766268497058299e-08, "logits/chosen": -2.416257381439209, "logits/rejected": -2.4568800926208496, "logps/chosen": -358.51763916015625, "logps/rejected": -404.95635986328125, "loss": 0.0282, "rewards/accuracies": 1.0, "rewards/chosen": -0.8332737684249878, "rewards/margins": 12.491031646728516, "rewards/rejected": -13.324304580688477, "step": 11620 }, { "epoch": 2.8, "learning_rate": 3.721697272241041e-08, "logits/chosen": -2.5190837383270264, "logits/rejected": -2.3154964447021484, "logps/chosen": -274.7784423828125, "logps/rejected": -369.0568542480469, "loss": 0.0358, "rewards/accuracies": 1.0, "rewards/chosen": 0.48575448989868164, "rewards/margins": 15.805773735046387, "rewards/rejected": -15.320019721984863, "step": 11630 }, { "epoch": 2.8, "learning_rate": 3.677126047423783e-08, "logits/chosen": -2.546103000640869, "logits/rejected": -2.5743775367736816, "logps/chosen": -263.50146484375, "logps/rejected": -404.35931396484375, "loss": 0.0317, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9340923428535461, "rewards/margins": 11.29251480102539, "rewards/rejected": -12.226606369018555, "step": 11640 }, { "epoch": 2.8, "learning_rate": 3.632554822606525e-08, "logits/chosen": -2.4657437801361084, "logits/rejected": -2.487313747406006, "logps/chosen": -268.81475830078125, "logps/rejected": -417.0496520996094, "loss": 0.0417, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.2531711459159851, "rewards/margins": 14.597402572631836, "rewards/rejected": -14.850573539733887, "step": 11650 }, { "epoch": 2.81, "learning_rate": 3.5879835977892673e-08, "logits/chosen": -2.512868881225586, "logits/rejected": -2.487403392791748, "logps/chosen": -297.1785888671875, "logps/rejected": -486.1136169433594, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": -1.2635729312896729, "rewards/margins": 12.216447830200195, "rewards/rejected": -13.480020523071289, "step": 11660 }, { "epoch": 2.81, "learning_rate": 3.5434123729720096e-08, "logits/chosen": -2.3326098918914795, "logits/rejected": -2.2055530548095703, "logps/chosen": -284.63873291015625, "logps/rejected": -447.083251953125, "loss": 0.0352, "rewards/accuracies": 1.0, "rewards/chosen": -0.730940043926239, "rewards/margins": 12.714864730834961, "rewards/rejected": -13.445805549621582, "step": 11670 }, { "epoch": 2.81, "learning_rate": 3.498841148154751e-08, "logits/chosen": -2.3177332878112793, "logits/rejected": -2.3130042552948, "logps/chosen": -243.593017578125, "logps/rejected": -440.8395080566406, "loss": 0.0525, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.358832359313965, "rewards/margins": 10.459243774414062, "rewards/rejected": -14.818075180053711, "step": 11680 }, { "epoch": 2.81, "learning_rate": 3.4542699233374935e-08, "logits/chosen": -2.4748282432556152, "logits/rejected": -2.444901943206787, "logps/chosen": -351.2947082519531, "logps/rejected": -421.42169189453125, "loss": 0.0338, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.3172955513000488, "rewards/margins": 11.108989715576172, "rewards/rejected": -12.426286697387695, "step": 11690 }, { "epoch": 2.82, "learning_rate": 3.409698698520235e-08, "logits/chosen": -2.603511095046997, "logits/rejected": -2.533597707748413, "logps/chosen": -396.93988037109375, "logps/rejected": -450.39996337890625, "loss": 0.0273, "rewards/accuracies": 1.0, "rewards/chosen": -0.6498171091079712, "rewards/margins": 13.239534378051758, "rewards/rejected": -13.889350891113281, "step": 11700 }, { "epoch": 2.82, "learning_rate": 3.3651274737029774e-08, "logits/chosen": -2.388713836669922, "logits/rejected": -2.4331793785095215, "logps/chosen": -243.74618530273438, "logps/rejected": -333.30755615234375, "loss": 0.0306, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.776449203491211, "rewards/margins": 9.73577880859375, "rewards/rejected": -12.512228012084961, "step": 11710 }, { "epoch": 2.82, "learning_rate": 3.320556248885719e-08, "logits/chosen": -2.6002614498138428, "logits/rejected": -2.538332223892212, "logps/chosen": -363.6770935058594, "logps/rejected": -447.22161865234375, "loss": 0.0267, "rewards/accuracies": 1.0, "rewards/chosen": -1.1041080951690674, "rewards/margins": 12.499316215515137, "rewards/rejected": -13.603424072265625, "step": 11720 }, { "epoch": 2.82, "learning_rate": 3.275985024068461e-08, "logits/chosen": -2.549431324005127, "logits/rejected": -2.378579616546631, "logps/chosen": -354.511962890625, "logps/rejected": -424.26287841796875, "loss": 0.0209, "rewards/accuracies": 1.0, "rewards/chosen": -0.980708122253418, "rewards/margins": 12.861539840698242, "rewards/rejected": -13.842247009277344, "step": 11730 }, { "epoch": 2.83, "learning_rate": 3.2314137992512035e-08, "logits/chosen": -2.317164659500122, "logits/rejected": -2.1640431880950928, "logps/chosen": -331.0914306640625, "logps/rejected": -359.4324645996094, "loss": 0.0249, "rewards/accuracies": 1.0, "rewards/chosen": -4.1258039474487305, "rewards/margins": 10.919458389282227, "rewards/rejected": -15.045262336730957, "step": 11740 }, { "epoch": 2.83, "learning_rate": 3.186842574433946e-08, "logits/chosen": -2.5726194381713867, "logits/rejected": -2.4622859954833984, "logps/chosen": -256.5213928222656, "logps/rejected": -366.3427734375, "loss": 0.033, "rewards/accuracies": 1.0, "rewards/chosen": -0.2834794521331787, "rewards/margins": 13.488180160522461, "rewards/rejected": -13.771661758422852, "step": 11750 }, { "epoch": 2.83, "learning_rate": 3.1422713496166874e-08, "logits/chosen": -2.4840989112854004, "logits/rejected": -2.4598278999328613, "logps/chosen": -219.7997589111328, "logps/rejected": -393.89788818359375, "loss": 0.0463, "rewards/accuracies": 1.0, "rewards/chosen": -0.23123475909233093, "rewards/margins": 14.300189018249512, "rewards/rejected": -14.531425476074219, "step": 11760 }, { "epoch": 2.83, "learning_rate": 3.09770012479943e-08, "logits/chosen": -2.6010186672210693, "logits/rejected": -2.457659959793091, "logps/chosen": -313.2583923339844, "logps/rejected": -553.55322265625, "loss": 0.0217, "rewards/accuracies": 1.0, "rewards/chosen": 1.0449435710906982, "rewards/margins": 21.27800178527832, "rewards/rejected": -20.23305892944336, "step": 11770 }, { "epoch": 2.84, "learning_rate": 3.053128899982171e-08, "logits/chosen": -2.424867868423462, "logits/rejected": -2.2959485054016113, "logps/chosen": -297.36724853515625, "logps/rejected": -291.015380859375, "loss": 0.0428, "rewards/accuracies": 1.0, "rewards/chosen": -2.1360621452331543, "rewards/margins": 10.758634567260742, "rewards/rejected": -12.894696235656738, "step": 11780 }, { "epoch": 2.84, "learning_rate": 3.0085576751649136e-08, "logits/chosen": -2.5902180671691895, "logits/rejected": -2.3549537658691406, "logps/chosen": -416.59326171875, "logps/rejected": -386.328857421875, "loss": 0.0158, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.3209166526794434, "rewards/margins": 11.371603012084961, "rewards/rejected": -14.692520141601562, "step": 11790 }, { "epoch": 2.84, "learning_rate": 2.963986450347655e-08, "logits/chosen": -2.6646437644958496, "logits/rejected": -2.477449893951416, "logps/chosen": -300.90216064453125, "logps/rejected": -407.06573486328125, "loss": 0.0362, "rewards/accuracies": 1.0, "rewards/chosen": -1.187294602394104, "rewards/margins": 13.52137279510498, "rewards/rejected": -14.708666801452637, "step": 11800 }, { "epoch": 2.84, "learning_rate": 2.9194152255303974e-08, "logits/chosen": -2.601910352706909, "logits/rejected": -2.3476791381835938, "logps/chosen": -271.9495849609375, "logps/rejected": -325.62652587890625, "loss": 0.0396, "rewards/accuracies": 1.0, "rewards/chosen": -1.8956215381622314, "rewards/margins": 10.339010238647461, "rewards/rejected": -12.234630584716797, "step": 11810 }, { "epoch": 2.84, "learning_rate": 2.8748440007131394e-08, "logits/chosen": -2.474547863006592, "logits/rejected": -2.5126729011535645, "logps/chosen": -239.34182739257812, "logps/rejected": -356.5572814941406, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": -1.9234554767608643, "rewards/margins": 10.614490509033203, "rewards/rejected": -12.537944793701172, "step": 11820 }, { "epoch": 2.85, "learning_rate": 2.8302727758958813e-08, "logits/chosen": -2.447876453399658, "logits/rejected": -2.273216962814331, "logps/chosen": -272.24359130859375, "logps/rejected": -399.24481201171875, "loss": 0.0285, "rewards/accuracies": 1.0, "rewards/chosen": -0.6911055445671082, "rewards/margins": 13.995025634765625, "rewards/rejected": -14.686132431030273, "step": 11830 }, { "epoch": 2.85, "learning_rate": 2.7857015510786233e-08, "logits/chosen": -2.432492971420288, "logits/rejected": -2.529512405395508, "logps/chosen": -275.92083740234375, "logps/rejected": -437.6058654785156, "loss": 0.0331, "rewards/accuracies": 1.0, "rewards/chosen": -1.2148009538650513, "rewards/margins": 12.627706527709961, "rewards/rejected": -13.842506408691406, "step": 11840 }, { "epoch": 2.85, "learning_rate": 2.7411303262613655e-08, "logits/chosen": -2.1875171661376953, "logits/rejected": -2.139634847640991, "logps/chosen": -189.89190673828125, "logps/rejected": -250.09097290039062, "loss": 0.0405, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.3723418712615967, "rewards/margins": 8.872549057006836, "rewards/rejected": -12.244890213012695, "step": 11850 }, { "epoch": 2.85, "learning_rate": 2.6965591014441075e-08, "logits/chosen": -2.489630699157715, "logits/rejected": -2.4272329807281494, "logps/chosen": -273.12139892578125, "logps/rejected": -408.4613342285156, "loss": 0.025, "rewards/accuracies": 1.0, "rewards/chosen": -0.5734224319458008, "rewards/margins": 11.171777725219727, "rewards/rejected": -11.745201110839844, "step": 11860 }, { "epoch": 2.86, "learning_rate": 2.6519878766268494e-08, "logits/chosen": -2.526616334915161, "logits/rejected": -2.511665105819702, "logps/chosen": -364.6877136230469, "logps/rejected": -448.8736267089844, "loss": 0.0297, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9400394558906555, "rewards/margins": 14.034416198730469, "rewards/rejected": -14.974454879760742, "step": 11870 }, { "epoch": 2.86, "learning_rate": 2.6074166518095914e-08, "logits/chosen": -2.396233558654785, "logits/rejected": -2.23172926902771, "logps/chosen": -292.914794921875, "logps/rejected": -443.27764892578125, "loss": 0.0318, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.0942294597625732, "rewards/margins": 14.994674682617188, "rewards/rejected": -17.088903427124023, "step": 11880 }, { "epoch": 2.86, "learning_rate": 2.562845426992334e-08, "logits/chosen": -2.5167908668518066, "logits/rejected": -2.5856142044067383, "logps/chosen": -236.70388793945312, "logps/rejected": -390.9494934082031, "loss": 0.0558, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9860879778862, "rewards/margins": 10.780597686767578, "rewards/rejected": -11.766684532165527, "step": 11890 }, { "epoch": 2.86, "learning_rate": 2.518274202175076e-08, "logits/chosen": -2.3103814125061035, "logits/rejected": -2.3382656574249268, "logps/chosen": -254.10647583007812, "logps/rejected": -315.13250732421875, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -1.0061086416244507, "rewards/margins": 11.909418106079102, "rewards/rejected": -12.91552734375, "step": 11900 }, { "epoch": 2.87, "learning_rate": 2.4737029773578178e-08, "logits/chosen": -2.516348123550415, "logits/rejected": -2.570709228515625, "logps/chosen": -282.7447509765625, "logps/rejected": -372.11151123046875, "loss": 0.0349, "rewards/accuracies": 1.0, "rewards/chosen": -0.984648585319519, "rewards/margins": 11.936151504516602, "rewards/rejected": -12.920801162719727, "step": 11910 }, { "epoch": 2.87, "learning_rate": 2.4291317525405598e-08, "logits/chosen": -2.423058271408081, "logits/rejected": -2.390669584274292, "logps/chosen": -286.0511169433594, "logps/rejected": -361.89581298828125, "loss": 0.0184, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.80135178565979, "rewards/margins": 11.103158950805664, "rewards/rejected": -12.904510498046875, "step": 11920 }, { "epoch": 2.87, "learning_rate": 2.3845605277233017e-08, "logits/chosen": -2.5798749923706055, "logits/rejected": -2.433136463165283, "logps/chosen": -254.32369995117188, "logps/rejected": -274.3641357421875, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -1.7883821725845337, "rewards/margins": 9.948869705200195, "rewards/rejected": -11.737250328063965, "step": 11930 }, { "epoch": 2.87, "learning_rate": 2.339989302906044e-08, "logits/chosen": -2.6218628883361816, "logits/rejected": -2.4903016090393066, "logps/chosen": -273.2744140625, "logps/rejected": -320.1408996582031, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": -0.44978299736976624, "rewards/margins": 10.955463409423828, "rewards/rejected": -11.40524673461914, "step": 11940 }, { "epoch": 2.88, "learning_rate": 2.295418078088786e-08, "logits/chosen": -2.639375925064087, "logits/rejected": -2.454336404800415, "logps/chosen": -351.46722412109375, "logps/rejected": -401.80218505859375, "loss": 0.0272, "rewards/accuracies": 1.0, "rewards/chosen": -1.7721977233886719, "rewards/margins": 12.326979637145996, "rewards/rejected": -14.099177360534668, "step": 11950 }, { "epoch": 2.88, "learning_rate": 2.250846853271528e-08, "logits/chosen": -2.226454257965088, "logits/rejected": -2.206085205078125, "logps/chosen": -327.9383239746094, "logps/rejected": -373.9483642578125, "loss": 0.0713, "rewards/accuracies": 1.0, "rewards/chosen": -0.020443439483642578, "rewards/margins": 13.046069145202637, "rewards/rejected": -13.066513061523438, "step": 11960 }, { "epoch": 2.88, "learning_rate": 2.2062756284542698e-08, "logits/chosen": -2.186215877532959, "logits/rejected": -2.3007748126983643, "logps/chosen": -390.6415710449219, "logps/rejected": -473.8223571777344, "loss": 0.036, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.41581654548645, "rewards/margins": 13.948402404785156, "rewards/rejected": -16.364215850830078, "step": 11970 }, { "epoch": 2.88, "learning_rate": 2.161704403637012e-08, "logits/chosen": -2.66757869720459, "logits/rejected": -2.659167528152466, "logps/chosen": -331.8939514160156, "logps/rejected": -423.4248046875, "loss": 0.0296, "rewards/accuracies": 1.0, "rewards/chosen": 1.1421377658843994, "rewards/margins": 15.744915962219238, "rewards/rejected": -14.602778434753418, "step": 11980 }, { "epoch": 2.89, "learning_rate": 2.117133178819754e-08, "logits/chosen": -2.4948318004608154, "logits/rejected": -2.486616611480713, "logps/chosen": -279.82806396484375, "logps/rejected": -391.32684326171875, "loss": 0.0325, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.112886905670166, "rewards/margins": 11.679197311401367, "rewards/rejected": -15.792081832885742, "step": 11990 }, { "epoch": 2.89, "learning_rate": 2.072561954002496e-08, "logits/chosen": -2.5811285972595215, "logits/rejected": -2.468341112136841, "logps/chosen": -347.5221252441406, "logps/rejected": -434.04791259765625, "loss": 0.036, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9449437260627747, "rewards/margins": 14.833131790161133, "rewards/rejected": -15.778076171875, "step": 12000 }, { "epoch": 2.89, "eval_logits/chosen": -2.2010746002197266, "eval_logits/rejected": -2.1640734672546387, "eval_logps/chosen": -280.1643371582031, "eval_logps/rejected": -312.15899658203125, "eval_loss": 0.6311665773391724, "eval_rewards/accuracies": 0.6899999976158142, "eval_rewards/chosen": -7.823678970336914, "eval_rewards/margins": 4.439140796661377, "eval_rewards/rejected": -12.262818336486816, "eval_runtime": 132.594, "eval_samples_per_second": 23.802, "eval_steps_per_second": 0.377, "step": 12000 }, { "epoch": 2.89, "learning_rate": 2.027990729185238e-08, "logits/chosen": -2.3970046043395996, "logits/rejected": -2.4178857803344727, "logps/chosen": -324.0255432128906, "logps/rejected": -445.8971252441406, "loss": 0.0191, "rewards/accuracies": 1.0, "rewards/chosen": -0.6764753460884094, "rewards/margins": 11.698667526245117, "rewards/rejected": -12.375143051147461, "step": 12010 }, { "epoch": 2.89, "learning_rate": 1.9834195043679802e-08, "logits/chosen": -2.5303125381469727, "logits/rejected": -2.3805575370788574, "logps/chosen": -244.6219940185547, "logps/rejected": -384.60552978515625, "loss": 0.0426, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.833562135696411, "rewards/margins": 9.956976890563965, "rewards/rejected": -13.790539741516113, "step": 12020 }, { "epoch": 2.9, "learning_rate": 1.938848279550722e-08, "logits/chosen": -2.2676398754119873, "logits/rejected": -2.273580312728882, "logps/chosen": -226.77523803710938, "logps/rejected": -294.951416015625, "loss": 0.0195, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0574922561645508, "rewards/margins": 11.238329887390137, "rewards/rejected": -12.29582405090332, "step": 12030 }, { "epoch": 2.9, "learning_rate": 1.894277054733464e-08, "logits/chosen": -2.22725248336792, "logits/rejected": -2.2449066638946533, "logps/chosen": -380.6275329589844, "logps/rejected": -304.9622497558594, "loss": 0.0238, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.221129894256592, "rewards/margins": 9.654314041137695, "rewards/rejected": -13.875444412231445, "step": 12040 }, { "epoch": 2.9, "learning_rate": 1.849705829916206e-08, "logits/chosen": -2.4066781997680664, "logits/rejected": -2.3611502647399902, "logps/chosen": -287.4486999511719, "logps/rejected": -345.9519348144531, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": -1.8492765426635742, "rewards/margins": 10.583861351013184, "rewards/rejected": -12.433137893676758, "step": 12050 }, { "epoch": 2.9, "learning_rate": 1.8051346050989483e-08, "logits/chosen": -2.475659132003784, "logits/rejected": -2.431633472442627, "logps/chosen": -297.91424560546875, "logps/rejected": -415.7301330566406, "loss": 0.0152, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.32944917678833, "rewards/margins": 11.287969589233398, "rewards/rejected": -14.61741828918457, "step": 12060 }, { "epoch": 2.9, "learning_rate": 1.7605633802816902e-08, "logits/chosen": -2.388906955718994, "logits/rejected": -2.247445821762085, "logps/chosen": -255.1102752685547, "logps/rejected": -369.0557861328125, "loss": 0.1142, "rewards/accuracies": 1.0, "rewards/chosen": -0.6246918439865112, "rewards/margins": 12.72299575805664, "rewards/rejected": -13.347684860229492, "step": 12070 }, { "epoch": 2.91, "learning_rate": 1.715992155464432e-08, "logits/chosen": -2.5152182579040527, "logits/rejected": -2.463408946990967, "logps/chosen": -324.1038513183594, "logps/rejected": -415.5223693847656, "loss": 0.0387, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.5152909755706787, "rewards/margins": 11.51399040222168, "rewards/rejected": -14.029281616210938, "step": 12080 }, { "epoch": 2.91, "learning_rate": 1.671420930647174e-08, "logits/chosen": -2.61296010017395, "logits/rejected": -2.494764566421509, "logps/chosen": -306.69537353515625, "logps/rejected": -344.83258056640625, "loss": 0.0651, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.15839901566505432, "rewards/margins": 12.144659996032715, "rewards/rejected": -12.303059577941895, "step": 12090 }, { "epoch": 2.91, "learning_rate": 1.626849705829916e-08, "logits/chosen": -2.5070996284484863, "logits/rejected": -2.388580799102783, "logps/chosen": -234.34188842773438, "logps/rejected": -412.37200927734375, "loss": 0.0235, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9789184331893921, "rewards/margins": 14.761987686157227, "rewards/rejected": -15.74090576171875, "step": 12100 }, { "epoch": 2.91, "learning_rate": 1.5822784810126583e-08, "logits/chosen": -2.5977272987365723, "logits/rejected": -2.455437421798706, "logps/chosen": -315.7597351074219, "logps/rejected": -445.12872314453125, "loss": 0.0336, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.5849735736846924, "rewards/margins": 11.080507278442383, "rewards/rejected": -13.665481567382812, "step": 12110 }, { "epoch": 2.92, "learning_rate": 1.5377072561954002e-08, "logits/chosen": -2.4510676860809326, "logits/rejected": -2.355278491973877, "logps/chosen": -229.1404266357422, "logps/rejected": -328.08831787109375, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/chosen": -1.985194444656372, "rewards/margins": 11.137495040893555, "rewards/rejected": -13.122690200805664, "step": 12120 }, { "epoch": 2.92, "learning_rate": 1.4931360313781422e-08, "logits/chosen": -2.541381359100342, "logits/rejected": -2.4581973552703857, "logps/chosen": -246.9438934326172, "logps/rejected": -331.85430908203125, "loss": 0.0445, "rewards/accuracies": 1.0, "rewards/chosen": -0.5580185651779175, "rewards/margins": 10.875816345214844, "rewards/rejected": -11.433834075927734, "step": 12130 }, { "epoch": 2.92, "learning_rate": 1.4485648065608843e-08, "logits/chosen": -2.435041904449463, "logits/rejected": -2.3537468910217285, "logps/chosen": -284.8828125, "logps/rejected": -479.46588134765625, "loss": 0.0472, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.4715332984924316, "rewards/margins": 14.933647155761719, "rewards/rejected": -17.405181884765625, "step": 12140 }, { "epoch": 2.92, "learning_rate": 1.4039935817436262e-08, "logits/chosen": -2.563504695892334, "logits/rejected": -2.54644513130188, "logps/chosen": -255.3700408935547, "logps/rejected": -397.90435791015625, "loss": 0.0356, "rewards/accuracies": 1.0, "rewards/chosen": -1.8312803506851196, "rewards/margins": 12.085309028625488, "rewards/rejected": -13.916587829589844, "step": 12150 }, { "epoch": 2.93, "learning_rate": 1.3594223569263683e-08, "logits/chosen": -2.327991485595703, "logits/rejected": -2.257784366607666, "logps/chosen": -234.7561492919922, "logps/rejected": -345.07720947265625, "loss": 0.0439, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.1010355949401855, "rewards/margins": 9.344538688659668, "rewards/rejected": -12.445573806762695, "step": 12160 }, { "epoch": 2.93, "learning_rate": 1.3148511321091103e-08, "logits/chosen": -2.4475510120391846, "logits/rejected": -2.3634095191955566, "logps/chosen": -286.64276123046875, "logps/rejected": -417.7353515625, "loss": 0.0305, "rewards/accuracies": 1.0, "rewards/chosen": -1.1268631219863892, "rewards/margins": 12.717265129089355, "rewards/rejected": -13.844128608703613, "step": 12170 }, { "epoch": 2.93, "learning_rate": 1.2702799072918524e-08, "logits/chosen": -2.515963077545166, "logits/rejected": -2.4292666912078857, "logps/chosen": -294.73486328125, "logps/rejected": -405.29302978515625, "loss": 0.05, "rewards/accuracies": 1.0, "rewards/chosen": -0.17532333731651306, "rewards/margins": 13.44401741027832, "rewards/rejected": -13.619341850280762, "step": 12180 }, { "epoch": 2.93, "learning_rate": 1.2257086824745943e-08, "logits/chosen": -2.5807528495788574, "logits/rejected": -2.4983749389648438, "logps/chosen": -289.40106201171875, "logps/rejected": -375.0059509277344, "loss": 0.0257, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.073075771331787, "rewards/margins": 10.209634780883789, "rewards/rejected": -13.282710075378418, "step": 12190 }, { "epoch": 2.94, "learning_rate": 1.1811374576573364e-08, "logits/chosen": -2.569288730621338, "logits/rejected": -2.4954841136932373, "logps/chosen": -303.577880859375, "logps/rejected": -333.6105041503906, "loss": 0.0305, "rewards/accuracies": 1.0, "rewards/chosen": -1.453270673751831, "rewards/margins": 9.452234268188477, "rewards/rejected": -10.905505180358887, "step": 12200 }, { "epoch": 2.94, "learning_rate": 1.1365662328400784e-08, "logits/chosen": -2.393186569213867, "logits/rejected": -2.2872087955474854, "logps/chosen": -394.0965270996094, "logps/rejected": -376.77166748046875, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": -0.8482491374015808, "rewards/margins": 12.98796272277832, "rewards/rejected": -13.836212158203125, "step": 12210 }, { "epoch": 2.94, "learning_rate": 1.0919950080228205e-08, "logits/chosen": -2.589224338531494, "logits/rejected": -2.537210702896118, "logps/chosen": -434.2206115722656, "logps/rejected": -433.95751953125, "loss": 0.0333, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0010029077529907, "rewards/margins": 11.84622859954834, "rewards/rejected": -12.8472318649292, "step": 12220 }, { "epoch": 2.94, "learning_rate": 1.0474237832055624e-08, "logits/chosen": -2.519874095916748, "logits/rejected": -2.4072184562683105, "logps/chosen": -359.74542236328125, "logps/rejected": -430.4481506347656, "loss": 0.0288, "rewards/accuracies": 1.0, "rewards/chosen": -1.849611520767212, "rewards/margins": 10.379087448120117, "rewards/rejected": -12.228699684143066, "step": 12230 }, { "epoch": 2.95, "learning_rate": 1.0028525583883044e-08, "logits/chosen": -2.4503467082977295, "logits/rejected": -2.3041646480560303, "logps/chosen": -316.7175598144531, "logps/rejected": -411.546142578125, "loss": 0.0372, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.5837459564208984, "rewards/margins": 10.363752365112305, "rewards/rejected": -11.94749927520752, "step": 12240 }, { "epoch": 2.95, "learning_rate": 9.582813335710465e-09, "logits/chosen": -2.604128360748291, "logits/rejected": -2.4129440784454346, "logps/chosen": -303.106689453125, "logps/rejected": -401.07403564453125, "loss": 0.0283, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.1794488430023193, "rewards/margins": 10.987741470336914, "rewards/rejected": -13.167190551757812, "step": 12250 }, { "epoch": 2.95, "learning_rate": 9.137101087537884e-09, "logits/chosen": -2.621835708618164, "logits/rejected": -2.611644744873047, "logps/chosen": -312.47283935546875, "logps/rejected": -420.8551330566406, "loss": 0.0331, "rewards/accuracies": 1.0, "rewards/chosen": -0.31393319368362427, "rewards/margins": 12.102607727050781, "rewards/rejected": -12.41654109954834, "step": 12260 }, { "epoch": 2.95, "learning_rate": 8.691388839365305e-09, "logits/chosen": -2.4669082164764404, "logits/rejected": -2.4551076889038086, "logps/chosen": -227.9279022216797, "logps/rejected": -394.44915771484375, "loss": 0.039, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.9205996990203857, "rewards/margins": 16.079343795776367, "rewards/rejected": -18.999942779541016, "step": 12270 }, { "epoch": 2.96, "learning_rate": 8.245676591192724e-09, "logits/chosen": -2.5060513019561768, "logits/rejected": -2.425774097442627, "logps/chosen": -337.9098205566406, "logps/rejected": -466.40045166015625, "loss": 0.0337, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.6899657249450684, "rewards/margins": 12.175222396850586, "rewards/rejected": -14.865188598632812, "step": 12280 }, { "epoch": 2.96, "learning_rate": 7.799964343020146e-09, "logits/chosen": -2.4625678062438965, "logits/rejected": -2.3441214561462402, "logps/chosen": -154.75999450683594, "logps/rejected": -238.6104736328125, "loss": 0.0324, "rewards/accuracies": 1.0, "rewards/chosen": -0.8786206245422363, "rewards/margins": 9.321663856506348, "rewards/rejected": -10.200284004211426, "step": 12290 }, { "epoch": 2.96, "learning_rate": 7.3542520948475666e-09, "logits/chosen": -2.6390433311462402, "logits/rejected": -2.4912033081054688, "logps/chosen": -364.30194091796875, "logps/rejected": -403.852783203125, "loss": 0.0498, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.983339309692383, "rewards/margins": 11.095958709716797, "rewards/rejected": -14.079297065734863, "step": 12300 }, { "epoch": 2.96, "learning_rate": 6.908539846674986e-09, "logits/chosen": -2.519421100616455, "logits/rejected": -2.306983709335327, "logps/chosen": -333.4102478027344, "logps/rejected": -354.1184997558594, "loss": 0.0216, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.2175986766815186, "rewards/margins": 10.80386734008789, "rewards/rejected": -14.021466255187988, "step": 12310 }, { "epoch": 2.97, "learning_rate": 6.462827598502406e-09, "logits/chosen": -2.42765474319458, "logits/rejected": -2.3466317653656006, "logps/chosen": -230.0916290283203, "logps/rejected": -323.4930419921875, "loss": 0.0225, "rewards/accuracies": 1.0, "rewards/chosen": -1.2475663423538208, "rewards/margins": 11.202420234680176, "rewards/rejected": -12.449986457824707, "step": 12320 }, { "epoch": 2.97, "learning_rate": 6.0171153503298264e-09, "logits/chosen": -2.4995667934417725, "logits/rejected": -2.4612293243408203, "logps/chosen": -304.5595703125, "logps/rejected": -420.3321838378906, "loss": 0.0387, "rewards/accuracies": 1.0, "rewards/chosen": -0.5263036489486694, "rewards/margins": 12.25594711303711, "rewards/rejected": -12.782249450683594, "step": 12330 }, { "epoch": 2.97, "learning_rate": 5.571403102157247e-09, "logits/chosen": -2.5020134449005127, "logits/rejected": -2.490213632583618, "logps/chosen": -318.671630859375, "logps/rejected": -407.4111022949219, "loss": 0.0323, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.3810112476348877, "rewards/margins": 12.061712265014648, "rewards/rejected": -13.442724227905273, "step": 12340 }, { "epoch": 2.97, "learning_rate": 5.125690853984667e-09, "logits/chosen": -2.4014594554901123, "logits/rejected": -2.4098572731018066, "logps/chosen": -284.7099304199219, "logps/rejected": -533.87646484375, "loss": 0.0366, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 1.165526032447815, "rewards/margins": 17.97944450378418, "rewards/rejected": -16.813919067382812, "step": 12350 }, { "epoch": 2.97, "learning_rate": 4.679978605812087e-09, "logits/chosen": -2.448012351989746, "logits/rejected": -2.5330610275268555, "logps/chosen": -197.67715454101562, "logps/rejected": -392.29766845703125, "loss": 0.0268, "rewards/accuracies": 1.0, "rewards/chosen": -0.932754635810852, "rewards/margins": 11.501636505126953, "rewards/rejected": -12.434389114379883, "step": 12360 }, { "epoch": 2.98, "learning_rate": 4.234266357639507e-09, "logits/chosen": -2.4347705841064453, "logits/rejected": -2.4334542751312256, "logps/chosen": -273.04345703125, "logps/rejected": -339.4583740234375, "loss": 0.0277, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.6104110479354858, "rewards/margins": 12.005154609680176, "rewards/rejected": -13.615565299987793, "step": 12370 }, { "epoch": 2.98, "learning_rate": 3.788554109466928e-09, "logits/chosen": -2.3450114727020264, "logits/rejected": -2.1836085319519043, "logps/chosen": -335.5284423828125, "logps/rejected": -349.3228759765625, "loss": 0.0253, "rewards/accuracies": 1.0, "rewards/chosen": -0.5563626885414124, "rewards/margins": 13.879631042480469, "rewards/rejected": -14.435995101928711, "step": 12380 }, { "epoch": 2.98, "learning_rate": 3.3428418612943483e-09, "logits/chosen": -2.418477773666382, "logits/rejected": -2.4164748191833496, "logps/chosen": -214.49325561523438, "logps/rejected": -500.2579650878906, "loss": 0.0303, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 1.8474563360214233, "rewards/margins": 18.810083389282227, "rewards/rejected": -16.962627410888672, "step": 12390 }, { "epoch": 2.98, "learning_rate": 2.8971296131217685e-09, "logits/chosen": -2.357849597930908, "logits/rejected": -2.2878222465515137, "logps/chosen": -339.95703125, "logps/rejected": -390.0880432128906, "loss": 0.0216, "rewards/accuracies": 1.0, "rewards/chosen": -2.0557844638824463, "rewards/margins": 10.755159378051758, "rewards/rejected": -12.810943603515625, "step": 12400 }, { "epoch": 2.98, "eval_logits/chosen": -2.198592185974121, "eval_logits/rejected": -2.1612653732299805, "eval_logps/chosen": -278.6061096191406, "eval_logps/rejected": -310.4496154785156, "eval_loss": 0.6282873749732971, "eval_rewards/accuracies": 0.6899999976158142, "eval_rewards/chosen": -7.6678571701049805, "eval_rewards/margins": 4.4240264892578125, "eval_rewards/rejected": -12.091883659362793, "eval_runtime": 132.1622, "eval_samples_per_second": 23.88, "eval_steps_per_second": 0.378, "step": 12400 }, { "epoch": 2.99, "learning_rate": 2.4514173649491887e-09, "logits/chosen": -2.455528736114502, "logits/rejected": -2.414548635482788, "logps/chosen": -347.48919677734375, "logps/rejected": -353.279296875, "loss": 0.0162, "rewards/accuracies": 1.0, "rewards/chosen": -1.921791434288025, "rewards/margins": 10.95647144317627, "rewards/rejected": -12.878263473510742, "step": 12410 }, { "epoch": 2.99, "learning_rate": 2.005705116776609e-09, "logits/chosen": -2.5131871700286865, "logits/rejected": -2.3196463584899902, "logps/chosen": -286.4059143066406, "logps/rejected": -333.23028564453125, "loss": 0.0322, "rewards/accuracies": 1.0, "rewards/chosen": -0.35814499855041504, "rewards/margins": 11.833600044250488, "rewards/rejected": -12.191744804382324, "step": 12420 }, { "epoch": 2.99, "learning_rate": 1.5599928686040292e-09, "logits/chosen": -2.3323538303375244, "logits/rejected": -2.097768783569336, "logps/chosen": -366.6454772949219, "logps/rejected": -362.915771484375, "loss": 0.0224, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.7946733236312866, "rewards/margins": 11.0028657913208, "rewards/rejected": -12.797537803649902, "step": 12430 }, { "epoch": 2.99, "learning_rate": 1.1142806204314494e-09, "logits/chosen": -2.3995718955993652, "logits/rejected": -2.456651210784912, "logps/chosen": -319.78326416015625, "logps/rejected": -416.267822265625, "loss": 0.0376, "rewards/accuracies": 1.0, "rewards/chosen": -1.8602653741836548, "rewards/margins": 9.876348495483398, "rewards/rejected": -11.736612319946289, "step": 12440 }, { "epoch": 3.0, "learning_rate": 6.685683722588697e-10, "logits/chosen": -2.534796953201294, "logits/rejected": -2.4103546142578125, "logps/chosen": -353.41168212890625, "logps/rejected": -391.51812744140625, "loss": 0.0244, "rewards/accuracies": 1.0, "rewards/chosen": -0.9421192407608032, "rewards/margins": 14.015222549438477, "rewards/rejected": -14.957341194152832, "step": 12450 }, { "epoch": 3.0, "learning_rate": 2.2285612408628988e-10, "logits/chosen": -2.50495982170105, "logits/rejected": -2.418968915939331, "logps/chosen": -335.51995849609375, "logps/rejected": -341.50604248046875, "loss": 0.0187, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.3293344974517822, "rewards/margins": 12.255937576293945, "rewards/rejected": -13.585271835327148, "step": 12460 }, { "epoch": 3.0, "step": 12465, "total_flos": 0.0, "train_loss": 0.24049862180692672, "train_runtime": 20948.1804, "train_samples_per_second": 9.519, "train_steps_per_second": 0.595 } ], "logging_steps": 10, "max_steps": 12465, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 1247, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }