{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 5811, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 8.591065292096219e-10, "logits/chosen": -2.764016628265381, "logits/rejected": -2.674347400665283, "logps/chosen": -108.92428588867188, "logps/rejected": -112.40267944335938, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.01, "learning_rate": 8.59106529209622e-09, "logits/chosen": -2.863027811050415, "logits/rejected": -2.838684320449829, "logps/chosen": -327.73529052734375, "logps/rejected": -250.00613403320312, "loss": 0.6943, "rewards/accuracies": 0.5277777910232544, "rewards/chosen": -0.003003156976774335, "rewards/margins": 0.0038251648657023907, "rewards/rejected": -0.006828321143984795, "step": 10 }, { "epoch": 0.01, "learning_rate": 1.718213058419244e-08, "logits/chosen": -2.7698731422424316, "logits/rejected": -2.747614622116089, "logps/chosen": -251.3875274658203, "logps/rejected": -184.10693359375, "loss": 0.6945, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.0009023567545227706, "rewards/margins": -0.0005918591632507741, "rewards/rejected": -0.0003104977367911488, "step": 20 }, { "epoch": 0.02, "learning_rate": 2.5773195876288656e-08, "logits/chosen": -2.8563497066497803, "logits/rejected": -2.8292155265808105, "logps/chosen": -318.81866455078125, "logps/rejected": -268.5263977050781, "loss": 0.691, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.013460609130561352, "rewards/margins": 0.014691811986267567, "rewards/rejected": -0.0012312005273997784, "step": 30 }, { "epoch": 0.02, "learning_rate": 3.436426116838488e-08, "logits/chosen": -2.7859857082366943, "logits/rejected": -2.753185749053955, "logps/chosen": -324.4197692871094, "logps/rejected": -235.1029510498047, "loss": 0.6847, "rewards/accuracies": 0.612500011920929, "rewards/chosen": 0.004841436631977558, "rewards/margins": 0.015062311664223671, "rewards/rejected": -0.010220875963568687, "step": 40 }, { "epoch": 0.03, "learning_rate": 4.29553264604811e-08, "logits/chosen": -2.918607473373413, "logits/rejected": -2.89624285697937, "logps/chosen": -264.66351318359375, "logps/rejected": -208.9918212890625, "loss": 0.6823, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.01985430344939232, "rewards/margins": 0.03327787667512894, "rewards/rejected": -0.013423572294414043, "step": 50 }, { "epoch": 0.03, "learning_rate": 5.154639175257731e-08, "logits/chosen": -2.7966980934143066, "logits/rejected": -2.808544874191284, "logps/chosen": -266.514404296875, "logps/rejected": -252.36001586914062, "loss": 0.6705, "rewards/accuracies": 0.625, "rewards/chosen": 0.018798351287841797, "rewards/margins": 0.05238068103790283, "rewards/rejected": -0.03358232229948044, "step": 60 }, { "epoch": 0.04, "learning_rate": 6.013745704467354e-08, "logits/chosen": -2.8935234546661377, "logits/rejected": -2.854685068130493, "logps/chosen": -312.78436279296875, "logps/rejected": -256.33526611328125, "loss": 0.6655, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.03346724063158035, "rewards/margins": 0.07332305610179901, "rewards/rejected": -0.03985581547021866, "step": 70 }, { "epoch": 0.04, "learning_rate": 6.872852233676976e-08, "logits/chosen": -2.8555781841278076, "logits/rejected": -2.838266372680664, "logps/chosen": -291.6562805175781, "logps/rejected": -253.54049682617188, "loss": 0.6428, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.05274033546447754, "rewards/margins": 0.10176833719015121, "rewards/rejected": -0.04902799427509308, "step": 80 }, { "epoch": 0.05, "learning_rate": 7.731958762886598e-08, "logits/chosen": -2.8810267448425293, "logits/rejected": -2.8738858699798584, "logps/chosen": -312.50506591796875, "logps/rejected": -256.6100158691406, "loss": 0.6394, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.032647717744112015, "rewards/margins": 0.1116027683019638, "rewards/rejected": -0.07895506173372269, "step": 90 }, { "epoch": 0.05, "learning_rate": 8.59106529209622e-08, "logits/chosen": -2.717987537384033, "logits/rejected": -2.7490057945251465, "logps/chosen": -275.51495361328125, "logps/rejected": -199.8630828857422, "loss": 0.6299, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.03528403118252754, "rewards/margins": 0.14646485447883606, "rewards/rejected": -0.11118084192276001, "step": 100 }, { "epoch": 0.06, "learning_rate": 9.450171821305841e-08, "logits/chosen": -2.7796645164489746, "logits/rejected": -2.830543041229248, "logps/chosen": -273.095703125, "logps/rejected": -247.6790313720703, "loss": 0.6182, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.0012518130242824554, "rewards/margins": 0.21374264359474182, "rewards/rejected": -0.21249084174633026, "step": 110 }, { "epoch": 0.06, "learning_rate": 1.0309278350515462e-07, "logits/chosen": -2.800854206085205, "logits/rejected": -2.893012523651123, "logps/chosen": -235.9143829345703, "logps/rejected": -214.0904998779297, "loss": 0.5884, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.017621120437979698, "rewards/margins": 0.21942290663719177, "rewards/rejected": -0.20180177688598633, "step": 120 }, { "epoch": 0.07, "learning_rate": 1.1168384879725086e-07, "logits/chosen": -2.9455106258392334, "logits/rejected": -2.866570472717285, "logps/chosen": -317.5361022949219, "logps/rejected": -241.4765167236328, "loss": 0.5885, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.15108627080917358, "rewards/margins": 0.3808462917804718, "rewards/rejected": -0.22976000607013702, "step": 130 }, { "epoch": 0.07, "learning_rate": 1.202749140893471e-07, "logits/chosen": -2.828317403793335, "logits/rejected": -2.8364932537078857, "logps/chosen": -298.90313720703125, "logps/rejected": -277.0294189453125, "loss": 0.5635, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.043794650584459305, "rewards/margins": 0.49751076102256775, "rewards/rejected": -0.45371612906455994, "step": 140 }, { "epoch": 0.08, "learning_rate": 1.2886597938144328e-07, "logits/chosen": -2.898667335510254, "logits/rejected": -2.882783889770508, "logps/chosen": -291.00213623046875, "logps/rejected": -247.44692993164062, "loss": 0.5565, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.1469222903251648, "rewards/margins": 0.6005532145500183, "rewards/rejected": -0.4536309242248535, "step": 150 }, { "epoch": 0.08, "learning_rate": 1.3745704467353952e-07, "logits/chosen": -2.813276767730713, "logits/rejected": -2.8065598011016846, "logps/chosen": -301.55999755859375, "logps/rejected": -246.9912567138672, "loss": 0.538, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.14675188064575195, "rewards/margins": 0.5681458115577698, "rewards/rejected": -0.4213939309120178, "step": 160 }, { "epoch": 0.09, "learning_rate": 1.4604810996563573e-07, "logits/chosen": -2.8698763847351074, "logits/rejected": -2.8171753883361816, "logps/chosen": -285.3466796875, "logps/rejected": -235.87338256835938, "loss": 0.4963, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.16148421168327332, "rewards/margins": 0.8422476053237915, "rewards/rejected": -0.6807633638381958, "step": 170 }, { "epoch": 0.09, "learning_rate": 1.5463917525773197e-07, "logits/chosen": -2.8068044185638428, "logits/rejected": -2.8266568183898926, "logps/chosen": -237.0592803955078, "logps/rejected": -211.4211883544922, "loss": 0.5179, "rewards/accuracies": 0.75, "rewards/chosen": 0.04243529960513115, "rewards/margins": 0.6374794840812683, "rewards/rejected": -0.5950442552566528, "step": 180 }, { "epoch": 0.1, "learning_rate": 1.6323024054982818e-07, "logits/chosen": -2.780351161956787, "logits/rejected": -2.834380626678467, "logps/chosen": -267.52972412109375, "logps/rejected": -268.3015441894531, "loss": 0.5784, "rewards/accuracies": 0.6875, "rewards/chosen": 0.0268848929554224, "rewards/margins": 0.57568359375, "rewards/rejected": -0.5487987399101257, "step": 190 }, { "epoch": 0.1, "learning_rate": 1.718213058419244e-07, "logits/chosen": -2.724424123764038, "logits/rejected": -2.759570598602295, "logps/chosen": -283.513427734375, "logps/rejected": -192.94766235351562, "loss": 0.4916, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.18617114424705505, "rewards/margins": 0.8166147470474243, "rewards/rejected": -0.6304435133934021, "step": 200 }, { "epoch": 0.11, "learning_rate": 1.804123711340206e-07, "logits/chosen": -2.766322612762451, "logits/rejected": -2.6512322425842285, "logps/chosen": -264.46697998046875, "logps/rejected": -257.443603515625, "loss": 0.5674, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.12685665488243103, "rewards/margins": 0.5820873975753784, "rewards/rejected": -0.7089440822601318, "step": 210 }, { "epoch": 0.11, "learning_rate": 1.8900343642611682e-07, "logits/chosen": -2.8477821350097656, "logits/rejected": -2.830031394958496, "logps/chosen": -287.61212158203125, "logps/rejected": -240.40194702148438, "loss": 0.5498, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.06794139742851257, "rewards/margins": 0.7129371166229248, "rewards/rejected": -0.780878484249115, "step": 220 }, { "epoch": 0.12, "learning_rate": 1.9759450171821303e-07, "logits/chosen": -2.856313943862915, "logits/rejected": -2.8552908897399902, "logps/chosen": -293.28033447265625, "logps/rejected": -254.1317138671875, "loss": 0.5135, "rewards/accuracies": 0.75, "rewards/chosen": -0.050720613449811935, "rewards/margins": 0.640368640422821, "rewards/rejected": -0.6910892724990845, "step": 230 }, { "epoch": 0.12, "learning_rate": 2.0618556701030925e-07, "logits/chosen": -2.8190102577209473, "logits/rejected": -2.8251214027404785, "logps/chosen": -336.1227111816406, "logps/rejected": -227.45346069335938, "loss": 0.5406, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09252417087554932, "rewards/margins": 0.6168856024742126, "rewards/rejected": -0.709409773349762, "step": 240 }, { "epoch": 0.13, "learning_rate": 2.1477663230240549e-07, "logits/chosen": -2.9281716346740723, "logits/rejected": -2.908295154571533, "logps/chosen": -273.67584228515625, "logps/rejected": -254.62588500976562, "loss": 0.5117, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.07370147854089737, "rewards/margins": 0.9147092700004578, "rewards/rejected": -0.8410077095031738, "step": 250 }, { "epoch": 0.13, "learning_rate": 2.2336769759450173e-07, "logits/chosen": -2.911724805831909, "logits/rejected": -2.909519672393799, "logps/chosen": -300.3340759277344, "logps/rejected": -246.3933868408203, "loss": 0.5363, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.15076453983783722, "rewards/margins": 0.8897884488105774, "rewards/rejected": -0.7390238642692566, "step": 260 }, { "epoch": 0.14, "learning_rate": 2.3195876288659794e-07, "logits/chosen": -2.8644649982452393, "logits/rejected": -2.8738067150115967, "logps/chosen": -278.7558898925781, "logps/rejected": -239.0220489501953, "loss": 0.4945, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.01367796678096056, "rewards/margins": 0.7261922955513, "rewards/rejected": -0.7125142812728882, "step": 270 }, { "epoch": 0.14, "learning_rate": 2.405498281786942e-07, "logits/chosen": -2.877744674682617, "logits/rejected": -2.819153070449829, "logps/chosen": -313.3977966308594, "logps/rejected": -251.2105255126953, "loss": 0.5324, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.08043201267719269, "rewards/margins": 0.8994860649108887, "rewards/rejected": -0.8190540075302124, "step": 280 }, { "epoch": 0.15, "learning_rate": 2.4914089347079036e-07, "logits/chosen": -2.7928099632263184, "logits/rejected": -2.8494484424591064, "logps/chosen": -302.0984802246094, "logps/rejected": -284.1147155761719, "loss": 0.5093, "rewards/accuracies": 0.75, "rewards/chosen": 0.3942905068397522, "rewards/margins": 1.0846911668777466, "rewards/rejected": -0.6904006004333496, "step": 290 }, { "epoch": 0.15, "learning_rate": 2.5773195876288655e-07, "logits/chosen": -2.839244842529297, "logits/rejected": -2.8479905128479004, "logps/chosen": -262.75225830078125, "logps/rejected": -247.9908447265625, "loss": 0.5291, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.08927704393863678, "rewards/margins": 0.8800823092460632, "rewards/rejected": -0.7908053398132324, "step": 300 }, { "epoch": 0.16, "learning_rate": 2.663230240549828e-07, "logits/chosen": -2.8887572288513184, "logits/rejected": -2.907589912414551, "logps/chosen": -272.9616394042969, "logps/rejected": -226.31192016601562, "loss": 0.5171, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.0031876713037490845, "rewards/margins": 0.7666963338851929, "rewards/rejected": -0.7635086178779602, "step": 310 }, { "epoch": 0.17, "learning_rate": 2.7491408934707903e-07, "logits/chosen": -2.8778905868530273, "logits/rejected": -2.9233052730560303, "logps/chosen": -297.2710876464844, "logps/rejected": -233.50241088867188, "loss": 0.4689, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.038285039365291595, "rewards/margins": 1.1589080095291138, "rewards/rejected": -1.120622992515564, "step": 320 }, { "epoch": 0.17, "learning_rate": 2.835051546391752e-07, "logits/chosen": -2.852926254272461, "logits/rejected": -2.9000022411346436, "logps/chosen": -303.80938720703125, "logps/rejected": -248.91494750976562, "loss": 0.4865, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.016001278534531593, "rewards/margins": 1.0812021493911743, "rewards/rejected": -1.0972034931182861, "step": 330 }, { "epoch": 0.18, "learning_rate": 2.9209621993127146e-07, "logits/chosen": -2.8354809284210205, "logits/rejected": -2.8505842685699463, "logps/chosen": -289.55194091796875, "logps/rejected": -248.23849487304688, "loss": 0.5226, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.08476074039936066, "rewards/margins": 0.8404847979545593, "rewards/rejected": -0.7557238936424255, "step": 340 }, { "epoch": 0.18, "learning_rate": 3.006872852233677e-07, "logits/chosen": -2.8878281116485596, "logits/rejected": -2.8713698387145996, "logps/chosen": -234.1008758544922, "logps/rejected": -228.55288696289062, "loss": 0.4795, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06374388188123703, "rewards/margins": 1.0862773656845093, "rewards/rejected": -1.1500213146209717, "step": 350 }, { "epoch": 0.19, "learning_rate": 3.0927835051546394e-07, "logits/chosen": -2.8875787258148193, "logits/rejected": -2.853342056274414, "logps/chosen": -266.7445373535156, "logps/rejected": -220.08349609375, "loss": 0.4635, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.18792326748371124, "rewards/margins": 1.1800565719604492, "rewards/rejected": -0.9921333193778992, "step": 360 }, { "epoch": 0.19, "learning_rate": 3.178694158075601e-07, "logits/chosen": -2.8684146404266357, "logits/rejected": -2.859692096710205, "logps/chosen": -254.9929656982422, "logps/rejected": -205.95156860351562, "loss": 0.4846, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.16485366225242615, "rewards/margins": 1.232887864112854, "rewards/rejected": -1.068034052848816, "step": 370 }, { "epoch": 0.2, "learning_rate": 3.2646048109965636e-07, "logits/chosen": -2.8387057781219482, "logits/rejected": -2.7803750038146973, "logps/chosen": -242.21554565429688, "logps/rejected": -221.7504119873047, "loss": 0.566, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.01200074888765812, "rewards/margins": 0.9040949940681458, "rewards/rejected": -0.9160957336425781, "step": 380 }, { "epoch": 0.2, "learning_rate": 3.3505154639175255e-07, "logits/chosen": -2.893998146057129, "logits/rejected": -2.8745360374450684, "logps/chosen": -257.35675048828125, "logps/rejected": -215.04263305664062, "loss": 0.49, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.014016789384186268, "rewards/margins": 1.0278851985931396, "rewards/rejected": -1.0138683319091797, "step": 390 }, { "epoch": 0.21, "learning_rate": 3.436426116838488e-07, "logits/chosen": -2.944995880126953, "logits/rejected": -2.921973705291748, "logps/chosen": -255.16989135742188, "logps/rejected": -192.85818481445312, "loss": 0.5511, "rewards/accuracies": 0.6875, "rewards/chosen": -0.16071584820747375, "rewards/margins": 0.8611815571784973, "rewards/rejected": -1.0218971967697144, "step": 400 }, { "epoch": 0.21, "learning_rate": 3.5223367697594503e-07, "logits/chosen": -2.824647903442383, "logits/rejected": -2.78361439704895, "logps/chosen": -311.44757080078125, "logps/rejected": -225.4349822998047, "loss": 0.4273, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.0005831479793414474, "rewards/margins": 1.1429133415222168, "rewards/rejected": -1.1434962749481201, "step": 410 }, { "epoch": 0.22, "learning_rate": 3.608247422680412e-07, "logits/chosen": -2.7857377529144287, "logits/rejected": -2.771965980529785, "logps/chosen": -284.3433837890625, "logps/rejected": -241.23867797851562, "loss": 0.5274, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.19046534597873688, "rewards/margins": 1.2829582691192627, "rewards/rejected": -1.4734236001968384, "step": 420 }, { "epoch": 0.22, "learning_rate": 3.6941580756013745e-07, "logits/chosen": -2.8471944332122803, "logits/rejected": -2.8916735649108887, "logps/chosen": -243.7864532470703, "logps/rejected": -219.95291137695312, "loss": 0.5155, "rewards/accuracies": 0.75, "rewards/chosen": -0.4037954807281494, "rewards/margins": 1.2392082214355469, "rewards/rejected": -1.6430038213729858, "step": 430 }, { "epoch": 0.23, "learning_rate": 3.7800687285223364e-07, "logits/chosen": -2.827336072921753, "logits/rejected": -2.7996811866760254, "logps/chosen": -297.21405029296875, "logps/rejected": -289.5452880859375, "loss": 0.5969, "rewards/accuracies": 0.75, "rewards/chosen": -0.2905265688896179, "rewards/margins": 0.8604179620742798, "rewards/rejected": -1.150944471359253, "step": 440 }, { "epoch": 0.23, "learning_rate": 3.865979381443299e-07, "logits/chosen": -2.8593180179595947, "logits/rejected": -2.851736545562744, "logps/chosen": -268.6583557128906, "logps/rejected": -254.41995239257812, "loss": 0.5034, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.054434485733509064, "rewards/margins": 0.8534797430038452, "rewards/rejected": -0.9079142808914185, "step": 450 }, { "epoch": 0.24, "learning_rate": 3.9518900343642607e-07, "logits/chosen": -2.836496114730835, "logits/rejected": -2.8268418312072754, "logps/chosen": -273.56561279296875, "logps/rejected": -258.1180419921875, "loss": 0.594, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.26750993728637695, "rewards/margins": 1.0689507722854614, "rewards/rejected": -1.336460828781128, "step": 460 }, { "epoch": 0.24, "learning_rate": 4.037800687285223e-07, "logits/chosen": -2.9143097400665283, "logits/rejected": -2.8867809772491455, "logps/chosen": -320.6771240234375, "logps/rejected": -207.6370086669922, "loss": 0.4564, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2321922332048416, "rewards/margins": 1.1562436819076538, "rewards/rejected": -1.388435959815979, "step": 470 }, { "epoch": 0.25, "learning_rate": 4.123711340206185e-07, "logits/chosen": -2.939049243927002, "logits/rejected": -2.9370107650756836, "logps/chosen": -288.6817932128906, "logps/rejected": -250.44735717773438, "loss": 0.4858, "rewards/accuracies": 0.75, "rewards/chosen": -0.09014900773763657, "rewards/margins": 1.0462675094604492, "rewards/rejected": -1.1364164352416992, "step": 480 }, { "epoch": 0.25, "learning_rate": 4.209621993127148e-07, "logits/chosen": -2.859022855758667, "logits/rejected": -2.8309292793273926, "logps/chosen": -271.74542236328125, "logps/rejected": -241.3363494873047, "loss": 0.4504, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.31187617778778076, "rewards/margins": 1.1031101942062378, "rewards/rejected": -1.4149863719940186, "step": 490 }, { "epoch": 0.26, "learning_rate": 4.2955326460481097e-07, "logits/chosen": -2.998340129852295, "logits/rejected": -3.001023292541504, "logps/chosen": -274.80035400390625, "logps/rejected": -258.14520263671875, "loss": 0.533, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.30341288447380066, "rewards/margins": 0.8886991739273071, "rewards/rejected": -1.1921122074127197, "step": 500 }, { "epoch": 0.26, "eval_logits/chosen": -2.948715925216675, "eval_logits/rejected": -2.9319217205047607, "eval_logps/chosen": -277.62506103515625, "eval_logps/rejected": -246.04127502441406, "eval_loss": 0.508408784866333, "eval_rewards/accuracies": 0.777999997138977, "eval_rewards/chosen": -0.19016194343566895, "eval_rewards/margins": 1.1778383255004883, "eval_rewards/rejected": -1.3680005073547363, "eval_runtime": 452.2133, "eval_samples_per_second": 4.423, "eval_steps_per_second": 0.276, "step": 500 }, { "epoch": 0.26, "learning_rate": 4.381443298969072e-07, "logits/chosen": -2.897766351699829, "logits/rejected": -2.8928945064544678, "logps/chosen": -295.4362487792969, "logps/rejected": -250.251953125, "loss": 0.5221, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.33991071581840515, "rewards/margins": 0.7788680195808411, "rewards/rejected": -1.1187787055969238, "step": 510 }, { "epoch": 0.27, "learning_rate": 4.4673539518900345e-07, "logits/chosen": -2.905595064163208, "logits/rejected": -2.915565252304077, "logps/chosen": -252.61865234375, "logps/rejected": -219.57925415039062, "loss": 0.5462, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.25931137800216675, "rewards/margins": 1.3003042936325073, "rewards/rejected": -1.5596157312393188, "step": 520 }, { "epoch": 0.27, "learning_rate": 4.5532646048109964e-07, "logits/chosen": -2.8665707111358643, "logits/rejected": -2.8539767265319824, "logps/chosen": -280.4945373535156, "logps/rejected": -233.099365234375, "loss": 0.5134, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2802669405937195, "rewards/margins": 0.9665705561637878, "rewards/rejected": -1.2468374967575073, "step": 530 }, { "epoch": 0.28, "learning_rate": 4.639175257731959e-07, "logits/chosen": -2.88356614112854, "logits/rejected": -2.887423276901245, "logps/chosen": -286.5166931152344, "logps/rejected": -255.9084930419922, "loss": 0.601, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5912821888923645, "rewards/margins": 0.7030351758003235, "rewards/rejected": -1.2943174839019775, "step": 540 }, { "epoch": 0.28, "learning_rate": 4.7250859106529206e-07, "logits/chosen": -2.884530544281006, "logits/rejected": -2.823495626449585, "logps/chosen": -270.32037353515625, "logps/rejected": -246.2440185546875, "loss": 0.5912, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.40074628591537476, "rewards/margins": 1.3706839084625244, "rewards/rejected": -1.771430253982544, "step": 550 }, { "epoch": 0.29, "learning_rate": 4.810996563573884e-07, "logits/chosen": -2.8146512508392334, "logits/rejected": -2.834014415740967, "logps/chosen": -312.0709533691406, "logps/rejected": -268.26416015625, "loss": 0.5438, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.3308074474334717, "rewards/margins": 1.189579963684082, "rewards/rejected": -1.5203872919082642, "step": 560 }, { "epoch": 0.29, "learning_rate": 4.896907216494845e-07, "logits/chosen": -2.852008819580078, "logits/rejected": -2.841087818145752, "logps/chosen": -278.21600341796875, "logps/rejected": -263.9369201660156, "loss": 0.5615, "rewards/accuracies": 0.6875, "rewards/chosen": -0.5327223539352417, "rewards/margins": 0.9639410972595215, "rewards/rejected": -1.4966634511947632, "step": 570 }, { "epoch": 0.3, "learning_rate": 4.982817869415807e-07, "logits/chosen": -2.827453136444092, "logits/rejected": -2.918525457382202, "logps/chosen": -274.576904296875, "logps/rejected": -215.708251953125, "loss": 0.5615, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.22379016876220703, "rewards/margins": 1.179864525794983, "rewards/rejected": -1.40365469455719, "step": 580 }, { "epoch": 0.3, "learning_rate": 4.992350353796136e-07, "logits/chosen": -2.834676504135132, "logits/rejected": -2.772709369659424, "logps/chosen": -248.90567016601562, "logps/rejected": -249.8413543701172, "loss": 0.4855, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.11263740062713623, "rewards/margins": 1.379455327987671, "rewards/rejected": -1.4920928478240967, "step": 590 }, { "epoch": 0.31, "learning_rate": 4.982788296041308e-07, "logits/chosen": -2.8848588466644287, "logits/rejected": -2.909480094909668, "logps/chosen": -250.5892791748047, "logps/rejected": -227.27001953125, "loss": 0.6065, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.33602461218833923, "rewards/margins": 1.2359898090362549, "rewards/rejected": -1.572014570236206, "step": 600 }, { "epoch": 0.31, "learning_rate": 4.973226238286479e-07, "logits/chosen": -2.855553150177002, "logits/rejected": -2.922438144683838, "logps/chosen": -331.48419189453125, "logps/rejected": -273.5794982910156, "loss": 0.5389, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.20098695158958435, "rewards/margins": 1.2247838973999023, "rewards/rejected": -1.4257709980010986, "step": 610 }, { "epoch": 0.32, "learning_rate": 4.96366418053165e-07, "logits/chosen": -2.958442211151123, "logits/rejected": -2.9399642944335938, "logps/chosen": -284.391845703125, "logps/rejected": -267.7436218261719, "loss": 0.5924, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.27081987261772156, "rewards/margins": 0.6422858238220215, "rewards/rejected": -0.9131056666374207, "step": 620 }, { "epoch": 0.33, "learning_rate": 4.954102122776821e-07, "logits/chosen": -2.942708969116211, "logits/rejected": -2.9726662635803223, "logps/chosen": -255.775634765625, "logps/rejected": -202.54544067382812, "loss": 0.5454, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2214779406785965, "rewards/margins": 1.1896545886993408, "rewards/rejected": -1.411132574081421, "step": 630 }, { "epoch": 0.33, "learning_rate": 4.944540065021993e-07, "logits/chosen": -2.7782981395721436, "logits/rejected": -2.740140199661255, "logps/chosen": -243.0159149169922, "logps/rejected": -211.75009155273438, "loss": 0.5355, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.5337392091751099, "rewards/margins": 1.1862469911575317, "rewards/rejected": -1.7199862003326416, "step": 640 }, { "epoch": 0.34, "learning_rate": 4.934978007267163e-07, "logits/chosen": -2.857133626937866, "logits/rejected": -2.9078116416931152, "logps/chosen": -285.61309814453125, "logps/rejected": -253.51596069335938, "loss": 0.5293, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.08902885764837265, "rewards/margins": 1.221697449684143, "rewards/rejected": -1.3107261657714844, "step": 650 }, { "epoch": 0.34, "learning_rate": 4.925415949512335e-07, "logits/chosen": -2.792689800262451, "logits/rejected": -2.7753195762634277, "logps/chosen": -336.81976318359375, "logps/rejected": -265.2138366699219, "loss": 0.5447, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.26736006140708923, "rewards/margins": 1.6899057626724243, "rewards/rejected": -1.957265853881836, "step": 660 }, { "epoch": 0.35, "learning_rate": 4.915853891757506e-07, "logits/chosen": -2.6831443309783936, "logits/rejected": -2.6355414390563965, "logps/chosen": -199.3243865966797, "logps/rejected": -248.56185913085938, "loss": 0.6114, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.28927290439605713, "rewards/margins": 0.5596956014633179, "rewards/rejected": -0.848968505859375, "step": 670 }, { "epoch": 0.35, "learning_rate": 4.906291834002677e-07, "logits/chosen": -2.7662174701690674, "logits/rejected": -2.7773642539978027, "logps/chosen": -287.2375183105469, "logps/rejected": -260.99151611328125, "loss": 0.4939, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.39218276739120483, "rewards/margins": 1.1262353658676147, "rewards/rejected": -1.5184181928634644, "step": 680 }, { "epoch": 0.36, "learning_rate": 4.896729776247848e-07, "logits/chosen": -2.8319334983825684, "logits/rejected": -2.820892333984375, "logps/chosen": -291.27972412109375, "logps/rejected": -251.38760375976562, "loss": 0.4687, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.18871352076530457, "rewards/margins": 1.4281032085418701, "rewards/rejected": -1.616816759109497, "step": 690 }, { "epoch": 0.36, "learning_rate": 4.88716771849302e-07, "logits/chosen": -2.8433375358581543, "logits/rejected": -2.8692548274993896, "logps/chosen": -330.5716552734375, "logps/rejected": -274.46893310546875, "loss": 0.5239, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07384981215000153, "rewards/margins": 1.4511958360671997, "rewards/rejected": -1.5250459909439087, "step": 700 }, { "epoch": 0.37, "learning_rate": 4.87760566073819e-07, "logits/chosen": -2.7395122051239014, "logits/rejected": -2.781907558441162, "logps/chosen": -310.0351257324219, "logps/rejected": -240.80520629882812, "loss": 0.5632, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.20480065047740936, "rewards/margins": 1.400748610496521, "rewards/rejected": -1.6055494546890259, "step": 710 }, { "epoch": 0.37, "learning_rate": 4.868043602983362e-07, "logits/chosen": -2.8281850814819336, "logits/rejected": -2.814466953277588, "logps/chosen": -301.6328125, "logps/rejected": -299.40936279296875, "loss": 0.5002, "rewards/accuracies": 0.8125, "rewards/chosen": -0.5126577615737915, "rewards/margins": 1.7754802703857422, "rewards/rejected": -2.288137912750244, "step": 720 }, { "epoch": 0.38, "learning_rate": 4.858481545228533e-07, "logits/chosen": -2.8933727741241455, "logits/rejected": -2.8719959259033203, "logps/chosen": -322.64056396484375, "logps/rejected": -285.211181640625, "loss": 0.5076, "rewards/accuracies": 0.75, "rewards/chosen": -0.3348848521709442, "rewards/margins": 1.523197889328003, "rewards/rejected": -1.8580827713012695, "step": 730 }, { "epoch": 0.38, "learning_rate": 4.848919487473704e-07, "logits/chosen": -2.811448574066162, "logits/rejected": -2.7747862339019775, "logps/chosen": -302.3232727050781, "logps/rejected": -296.32391357421875, "loss": 0.6469, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.5591434240341187, "rewards/margins": 1.2536993026733398, "rewards/rejected": -1.8128427267074585, "step": 740 }, { "epoch": 0.39, "learning_rate": 4.839357429718875e-07, "logits/chosen": -2.8409669399261475, "logits/rejected": -2.7742369174957275, "logps/chosen": -273.21160888671875, "logps/rejected": -243.36630249023438, "loss": 0.5355, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4651729464530945, "rewards/margins": 1.2234070301055908, "rewards/rejected": -1.6885799169540405, "step": 750 }, { "epoch": 0.39, "learning_rate": 4.829795371964047e-07, "logits/chosen": -2.8159842491149902, "logits/rejected": -2.8242149353027344, "logps/chosen": -305.82257080078125, "logps/rejected": -267.11590576171875, "loss": 0.5369, "rewards/accuracies": 0.8125, "rewards/chosen": -0.4173976480960846, "rewards/margins": 1.344733476638794, "rewards/rejected": -1.7621314525604248, "step": 760 }, { "epoch": 0.4, "learning_rate": 4.820233314209217e-07, "logits/chosen": -2.718552589416504, "logits/rejected": -2.695807456970215, "logps/chosen": -265.13531494140625, "logps/rejected": -233.3996124267578, "loss": 0.5837, "rewards/accuracies": 0.6875, "rewards/chosen": -0.5660188794136047, "rewards/margins": 1.1836113929748535, "rewards/rejected": -1.7496302127838135, "step": 770 }, { "epoch": 0.4, "learning_rate": 4.810671256454389e-07, "logits/chosen": -2.7111926078796387, "logits/rejected": -2.733715772628784, "logps/chosen": -304.9040832519531, "logps/rejected": -286.5145568847656, "loss": 0.5114, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.6240701675415039, "rewards/margins": 1.450642466545105, "rewards/rejected": -2.0747127532958984, "step": 780 }, { "epoch": 0.41, "learning_rate": 4.80110919869956e-07, "logits/chosen": -2.7404356002807617, "logits/rejected": -2.7429983615875244, "logps/chosen": -301.89862060546875, "logps/rejected": -229.8195343017578, "loss": 0.5147, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.38781577348709106, "rewards/margins": 1.3693140745162964, "rewards/rejected": -1.7571296691894531, "step": 790 }, { "epoch": 0.41, "learning_rate": 4.791547140944731e-07, "logits/chosen": -2.643702507019043, "logits/rejected": -2.6573736667633057, "logps/chosen": -227.3995819091797, "logps/rejected": -227.03079223632812, "loss": 0.5413, "rewards/accuracies": 0.8125, "rewards/chosen": -0.20422391593456268, "rewards/margins": 1.3888452053070068, "rewards/rejected": -1.593069314956665, "step": 800 }, { "epoch": 0.42, "learning_rate": 4.781985083189902e-07, "logits/chosen": -2.707505702972412, "logits/rejected": -2.6472725868225098, "logps/chosen": -254.8813018798828, "logps/rejected": -265.68927001953125, "loss": 0.6213, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.28768256306648254, "rewards/margins": 1.237850546836853, "rewards/rejected": -1.5255329608917236, "step": 810 }, { "epoch": 0.42, "learning_rate": 4.772423025435074e-07, "logits/chosen": -2.695797920227051, "logits/rejected": -2.73037052154541, "logps/chosen": -283.4263916015625, "logps/rejected": -274.9232482910156, "loss": 0.6503, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.31081074476242065, "rewards/margins": 0.8685556650161743, "rewards/rejected": -1.1793664693832397, "step": 820 }, { "epoch": 0.43, "learning_rate": 4.762860967680244e-07, "logits/chosen": -2.790220022201538, "logits/rejected": -2.7574965953826904, "logps/chosen": -240.9722137451172, "logps/rejected": -196.33604431152344, "loss": 0.5816, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.1742028295993805, "rewards/margins": 1.068560004234314, "rewards/rejected": -1.2427630424499512, "step": 830 }, { "epoch": 0.43, "learning_rate": 4.7532989099254154e-07, "logits/chosen": -2.7602648735046387, "logits/rejected": -2.7110064029693604, "logps/chosen": -262.978271484375, "logps/rejected": -234.6663055419922, "loss": 0.5501, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.31769394874572754, "rewards/margins": 1.0034860372543335, "rewards/rejected": -1.321179986000061, "step": 840 }, { "epoch": 0.44, "learning_rate": 4.7437368521705866e-07, "logits/chosen": -2.8047854900360107, "logits/rejected": -2.840674638748169, "logps/chosen": -252.6496124267578, "logps/rejected": -260.48602294921875, "loss": 0.6849, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.21668288111686707, "rewards/margins": 1.5027235746383667, "rewards/rejected": -1.7194064855575562, "step": 850 }, { "epoch": 0.44, "learning_rate": 4.7341747944157577e-07, "logits/chosen": -2.8253366947174072, "logits/rejected": -2.776054859161377, "logps/chosen": -274.93511962890625, "logps/rejected": -252.29086303710938, "loss": 0.5782, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4284784197807312, "rewards/margins": 0.9962869882583618, "rewards/rejected": -1.4247655868530273, "step": 860 }, { "epoch": 0.45, "learning_rate": 4.724612736660929e-07, "logits/chosen": -2.7298855781555176, "logits/rejected": -2.7826218605041504, "logps/chosen": -292.0563049316406, "logps/rejected": -245.8948211669922, "loss": 0.7585, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.1329361200332642, "rewards/margins": 0.6591729521751404, "rewards/rejected": -1.7921088933944702, "step": 870 }, { "epoch": 0.45, "learning_rate": 4.7150506789061006e-07, "logits/chosen": -2.800131320953369, "logits/rejected": -2.76381254196167, "logps/chosen": -292.432373046875, "logps/rejected": -294.9343566894531, "loss": 0.5039, "rewards/accuracies": 0.8125, "rewards/chosen": -0.4364054203033447, "rewards/margins": 1.5577386617660522, "rewards/rejected": -1.9941442012786865, "step": 880 }, { "epoch": 0.46, "learning_rate": 4.7054886211512717e-07, "logits/chosen": -2.769394874572754, "logits/rejected": -2.8211991786956787, "logps/chosen": -284.194091796875, "logps/rejected": -250.88818359375, "loss": 0.5477, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2785911560058594, "rewards/margins": 1.1287821531295776, "rewards/rejected": -1.407373309135437, "step": 890 }, { "epoch": 0.46, "learning_rate": 4.695926563396443e-07, "logits/chosen": -2.868389844894409, "logits/rejected": -2.934311628341675, "logps/chosen": -275.0090026855469, "logps/rejected": -234.48464965820312, "loss": 0.5243, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4162035584449768, "rewards/margins": 0.9356048703193665, "rewards/rejected": -1.3518084287643433, "step": 900 }, { "epoch": 0.47, "learning_rate": 4.686364505641614e-07, "logits/chosen": -2.837700128555298, "logits/rejected": -2.8741488456726074, "logps/chosen": -275.07470703125, "logps/rejected": -236.64950561523438, "loss": 0.48, "rewards/accuracies": 0.8125, "rewards/chosen": -0.2955784201622009, "rewards/margins": 1.4538462162017822, "rewards/rejected": -1.7494245767593384, "step": 910 }, { "epoch": 0.47, "learning_rate": 4.676802447886785e-07, "logits/chosen": -2.7928593158721924, "logits/rejected": -2.8100712299346924, "logps/chosen": -269.1537170410156, "logps/rejected": -243.173828125, "loss": 0.6521, "rewards/accuracies": 0.75, "rewards/chosen": -0.4626309871673584, "rewards/margins": 1.3189350366592407, "rewards/rejected": -1.7815659046173096, "step": 920 }, { "epoch": 0.48, "learning_rate": 4.6672403901319564e-07, "logits/chosen": -2.819791793823242, "logits/rejected": -2.783853054046631, "logps/chosen": -258.08160400390625, "logps/rejected": -241.07174682617188, "loss": 0.4852, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.03211576119065285, "rewards/margins": 1.6092383861541748, "rewards/rejected": -1.641353964805603, "step": 930 }, { "epoch": 0.49, "learning_rate": 4.6576783323771275e-07, "logits/chosen": -2.71333384513855, "logits/rejected": -2.7661778926849365, "logps/chosen": -232.42251586914062, "logps/rejected": -220.47915649414062, "loss": 0.5073, "rewards/accuracies": 0.8125, "rewards/chosen": -0.39604687690734863, "rewards/margins": 1.278062105178833, "rewards/rejected": -1.6741091012954712, "step": 940 }, { "epoch": 0.49, "learning_rate": 4.6481162746222987e-07, "logits/chosen": -2.7065939903259277, "logits/rejected": -2.6961522102355957, "logps/chosen": -305.9462890625, "logps/rejected": -261.17340087890625, "loss": 0.7714, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.5392950773239136, "rewards/margins": 0.5164933204650879, "rewards/rejected": -2.055788278579712, "step": 950 }, { "epoch": 0.5, "learning_rate": 4.63855421686747e-07, "logits/chosen": -2.7435028553009033, "logits/rejected": -2.7790913581848145, "logps/chosen": -296.8639221191406, "logps/rejected": -247.61825561523438, "loss": 0.5541, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.42261379957199097, "rewards/margins": 1.4009877443313599, "rewards/rejected": -1.823601484298706, "step": 960 }, { "epoch": 0.5, "learning_rate": 4.628992159112641e-07, "logits/chosen": -2.782947063446045, "logits/rejected": -2.694589138031006, "logps/chosen": -264.4646301269531, "logps/rejected": -268.305908203125, "loss": 0.5559, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.18068067729473114, "rewards/margins": 1.476174235343933, "rewards/rejected": -1.6568549871444702, "step": 970 }, { "epoch": 0.51, "learning_rate": 4.6194301013578116e-07, "logits/chosen": -2.82590913772583, "logits/rejected": -2.785083293914795, "logps/chosen": -326.63250732421875, "logps/rejected": -253.30032348632812, "loss": 0.5292, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.36352962255477905, "rewards/margins": 1.4459807872772217, "rewards/rejected": -1.8095104694366455, "step": 980 }, { "epoch": 0.51, "learning_rate": 4.609868043602983e-07, "logits/chosen": -2.802238941192627, "logits/rejected": -2.833404302597046, "logps/chosen": -255.2863006591797, "logps/rejected": -233.3296661376953, "loss": 0.4816, "rewards/accuracies": 0.75, "rewards/chosen": -0.32216745615005493, "rewards/margins": 1.170350432395935, "rewards/rejected": -1.4925178289413452, "step": 990 }, { "epoch": 0.52, "learning_rate": 4.600305985848154e-07, "logits/chosen": -2.7925350666046143, "logits/rejected": -2.776784658432007, "logps/chosen": -248.7510986328125, "logps/rejected": -246.3361053466797, "loss": 0.4907, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.3822530508041382, "rewards/margins": 0.8374401926994324, "rewards/rejected": -1.2196933031082153, "step": 1000 }, { "epoch": 0.52, "eval_logits/chosen": -2.844231605529785, "eval_logits/rejected": -2.8401119709014893, "eval_logps/chosen": -279.0693054199219, "eval_logps/rejected": -250.51393127441406, "eval_loss": 0.5233809947967529, "eval_rewards/accuracies": 0.7620000243186951, "eval_rewards/chosen": -0.3345881402492523, "eval_rewards/margins": 1.4806790351867676, "eval_rewards/rejected": -1.8152673244476318, "eval_runtime": 452.4269, "eval_samples_per_second": 4.421, "eval_steps_per_second": 0.276, "step": 1000 }, { "epoch": 0.52, "learning_rate": 4.590743928093325e-07, "logits/chosen": -2.675524950027466, "logits/rejected": -2.6706223487854004, "logps/chosen": -316.2507629394531, "logps/rejected": -263.38165283203125, "loss": 0.5685, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.6011537313461304, "rewards/margins": 1.0471036434173584, "rewards/rejected": -1.6482574939727783, "step": 1010 }, { "epoch": 0.53, "learning_rate": 4.581181870338497e-07, "logits/chosen": -2.683260440826416, "logits/rejected": -2.792513370513916, "logps/chosen": -333.0619201660156, "logps/rejected": -292.0895080566406, "loss": 0.5133, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4557833671569824, "rewards/margins": 1.5725176334381104, "rewards/rejected": -2.0283007621765137, "step": 1020 }, { "epoch": 0.53, "learning_rate": 4.571619812583668e-07, "logits/chosen": -2.7991647720336914, "logits/rejected": -2.7419731616973877, "logps/chosen": -270.207763671875, "logps/rejected": -275.93701171875, "loss": 0.4559, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.46183767914772034, "rewards/margins": 1.4133819341659546, "rewards/rejected": -1.8752195835113525, "step": 1030 }, { "epoch": 0.54, "learning_rate": 4.562057754828839e-07, "logits/chosen": -2.690382480621338, "logits/rejected": -2.8083395957946777, "logps/chosen": -287.6054992675781, "logps/rejected": -252.50442504882812, "loss": 0.5438, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.5533614158630371, "rewards/margins": 1.6856781244277954, "rewards/rejected": -2.239039897918701, "step": 1040 }, { "epoch": 0.54, "learning_rate": 4.55249569707401e-07, "logits/chosen": -2.7608683109283447, "logits/rejected": -2.758338212966919, "logps/chosen": -236.76528930664062, "logps/rejected": -256.64666748046875, "loss": 0.4936, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.7707513570785522, "rewards/margins": 1.3510288000106812, "rewards/rejected": -2.1217801570892334, "step": 1050 }, { "epoch": 0.55, "learning_rate": 4.5429336393191814e-07, "logits/chosen": -2.6740434169769287, "logits/rejected": -2.7331643104553223, "logps/chosen": -257.99456787109375, "logps/rejected": -221.4962158203125, "loss": 0.6079, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.6206737756729126, "rewards/margins": 0.7699206471443176, "rewards/rejected": -1.3905946016311646, "step": 1060 }, { "epoch": 0.55, "learning_rate": 4.5333715815643525e-07, "logits/chosen": -2.7877304553985596, "logits/rejected": -2.815929889678955, "logps/chosen": -302.9522399902344, "logps/rejected": -267.2388916015625, "loss": 0.5461, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.6460703611373901, "rewards/margins": 1.2526905536651611, "rewards/rejected": -1.8987607955932617, "step": 1070 }, { "epoch": 0.56, "learning_rate": 4.5238095238095237e-07, "logits/chosen": -2.796346426010132, "logits/rejected": -2.7912135124206543, "logps/chosen": -281.9361267089844, "logps/rejected": -246.5717315673828, "loss": 0.5535, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.7052322030067444, "rewards/margins": 1.1822354793548584, "rewards/rejected": -1.8874677419662476, "step": 1080 }, { "epoch": 0.56, "learning_rate": 4.514247466054695e-07, "logits/chosen": -2.7953383922576904, "logits/rejected": -2.739332914352417, "logps/chosen": -229.3057403564453, "logps/rejected": -223.58279418945312, "loss": 0.5363, "rewards/accuracies": 0.75, "rewards/chosen": -0.6706255078315735, "rewards/margins": 1.3192580938339233, "rewards/rejected": -1.9898836612701416, "step": 1090 }, { "epoch": 0.57, "learning_rate": 4.504685408299866e-07, "logits/chosen": -2.7090344429016113, "logits/rejected": -2.6705334186553955, "logps/chosen": -302.71417236328125, "logps/rejected": -292.3421325683594, "loss": 0.5132, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.564530074596405, "rewards/margins": 1.3910361528396606, "rewards/rejected": -1.955566167831421, "step": 1100 }, { "epoch": 0.57, "learning_rate": 4.495123350545037e-07, "logits/chosen": -2.767871379852295, "logits/rejected": -2.7415690422058105, "logps/chosen": -302.222900390625, "logps/rejected": -287.3324279785156, "loss": 0.5608, "rewards/accuracies": 0.75, "rewards/chosen": -0.4624261260032654, "rewards/margins": 1.2597681283950806, "rewards/rejected": -1.7221943140029907, "step": 1110 }, { "epoch": 0.58, "learning_rate": 4.4855612927902083e-07, "logits/chosen": -2.6437506675720215, "logits/rejected": -2.641035318374634, "logps/chosen": -303.7630310058594, "logps/rejected": -254.9109649658203, "loss": 1.1531, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.7806851863861084, "rewards/margins": 1.3015507459640503, "rewards/rejected": -2.0822360515594482, "step": 1120 }, { "epoch": 0.58, "learning_rate": 4.4759992350353795e-07, "logits/chosen": -2.648594379425049, "logits/rejected": -2.6584677696228027, "logps/chosen": -293.0738830566406, "logps/rejected": -233.2223358154297, "loss": 0.4252, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.7711928486824036, "rewards/margins": 1.5724222660064697, "rewards/rejected": -2.3436150550842285, "step": 1130 }, { "epoch": 0.59, "learning_rate": 4.46643717728055e-07, "logits/chosen": -2.597867727279663, "logits/rejected": -2.697968006134033, "logps/chosen": -251.0467529296875, "logps/rejected": -260.2559509277344, "loss": 0.6018, "rewards/accuracies": 0.75, "rewards/chosen": -0.7678476572036743, "rewards/margins": 1.4263780117034912, "rewards/rejected": -2.194225788116455, "step": 1140 }, { "epoch": 0.59, "learning_rate": 4.4568751195257213e-07, "logits/chosen": -2.6516098976135254, "logits/rejected": -2.5899970531463623, "logps/chosen": -331.1034240722656, "logps/rejected": -261.8896484375, "loss": 0.5869, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.24301430583000183, "rewards/margins": 1.839286208152771, "rewards/rejected": -2.0823006629943848, "step": 1150 }, { "epoch": 0.6, "learning_rate": 4.447313061770893e-07, "logits/chosen": -2.6758570671081543, "logits/rejected": -2.602381706237793, "logps/chosen": -263.66851806640625, "logps/rejected": -281.92852783203125, "loss": 0.5807, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.30647602677345276, "rewards/margins": 1.638983964920044, "rewards/rejected": -1.9454599618911743, "step": 1160 }, { "epoch": 0.6, "learning_rate": 4.437751004016064e-07, "logits/chosen": -2.6164095401763916, "logits/rejected": -2.6072592735290527, "logps/chosen": -236.7494659423828, "logps/rejected": -247.128173828125, "loss": 0.4963, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.6536493301391602, "rewards/margins": 1.0608946084976196, "rewards/rejected": -1.7145439386367798, "step": 1170 }, { "epoch": 0.61, "learning_rate": 4.4281889462612353e-07, "logits/chosen": -2.7885098457336426, "logits/rejected": -2.735015869140625, "logps/chosen": -273.19390869140625, "logps/rejected": -229.81619262695312, "loss": 0.5465, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.5660006403923035, "rewards/margins": 1.4190986156463623, "rewards/rejected": -1.985099196434021, "step": 1180 }, { "epoch": 0.61, "learning_rate": 4.4186268885064064e-07, "logits/chosen": -2.7694408893585205, "logits/rejected": -2.7590034008026123, "logps/chosen": -299.28875732421875, "logps/rejected": -251.94491577148438, "loss": 0.531, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.6542151570320129, "rewards/margins": 1.0486948490142822, "rewards/rejected": -1.70291006565094, "step": 1190 }, { "epoch": 0.62, "learning_rate": 4.4090648307515776e-07, "logits/chosen": -2.7691597938537598, "logits/rejected": -2.7554731369018555, "logps/chosen": -229.202392578125, "logps/rejected": -191.56576538085938, "loss": 0.5307, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.9604787826538086, "rewards/margins": 0.9451109766960144, "rewards/rejected": -1.9055898189544678, "step": 1200 }, { "epoch": 0.62, "learning_rate": 4.399502772996749e-07, "logits/chosen": -2.7866642475128174, "logits/rejected": -2.7626142501831055, "logps/chosen": -278.181396484375, "logps/rejected": -256.399658203125, "loss": 0.4674, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.6451205611228943, "rewards/margins": 1.4265587329864502, "rewards/rejected": -2.0716793537139893, "step": 1210 }, { "epoch": 0.63, "learning_rate": 4.38994071524192e-07, "logits/chosen": -2.765838623046875, "logits/rejected": -2.835886001586914, "logps/chosen": -258.55694580078125, "logps/rejected": -223.63119506835938, "loss": 0.572, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.7563117742538452, "rewards/margins": 1.1331018209457397, "rewards/rejected": -1.889413595199585, "step": 1220 }, { "epoch": 0.64, "learning_rate": 4.380378657487091e-07, "logits/chosen": -2.9007599353790283, "logits/rejected": -2.8781092166900635, "logps/chosen": -322.48297119140625, "logps/rejected": -309.03057861328125, "loss": 0.6139, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.5290047526359558, "rewards/margins": 1.6923503875732422, "rewards/rejected": -2.2213549613952637, "step": 1230 }, { "epoch": 0.64, "learning_rate": 4.370816599732262e-07, "logits/chosen": -2.882289171218872, "logits/rejected": -2.8788981437683105, "logps/chosen": -311.59112548828125, "logps/rejected": -237.2730712890625, "loss": 0.5165, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.0326735973358154, "rewards/margins": 1.2838331460952759, "rewards/rejected": -2.316506862640381, "step": 1240 }, { "epoch": 0.65, "learning_rate": 4.3612545419774334e-07, "logits/chosen": -2.735339641571045, "logits/rejected": -2.765554189682007, "logps/chosen": -240.76547241210938, "logps/rejected": -268.36077880859375, "loss": 0.5601, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.080916166305542, "rewards/margins": 0.9319782257080078, "rewards/rejected": -2.01289439201355, "step": 1250 }, { "epoch": 0.65, "learning_rate": 4.3516924842226045e-07, "logits/chosen": -2.712817907333374, "logits/rejected": -2.779609203338623, "logps/chosen": -303.5237731933594, "logps/rejected": -254.60684204101562, "loss": 0.6256, "rewards/accuracies": 0.75, "rewards/chosen": -1.225891351699829, "rewards/margins": 1.0712993144989014, "rewards/rejected": -2.2971906661987305, "step": 1260 }, { "epoch": 0.66, "learning_rate": 4.3421304264677757e-07, "logits/chosen": -2.8182456493377686, "logits/rejected": -2.7474656105041504, "logps/chosen": -265.8131103515625, "logps/rejected": -225.51089477539062, "loss": 0.5053, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.8482934236526489, "rewards/margins": 1.4276056289672852, "rewards/rejected": -2.2758994102478027, "step": 1270 }, { "epoch": 0.66, "learning_rate": 4.332568368712947e-07, "logits/chosen": -2.829192876815796, "logits/rejected": -2.8889195919036865, "logps/chosen": -276.78814697265625, "logps/rejected": -252.64096069335938, "loss": 0.5599, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.9601444005966187, "rewards/margins": 1.1756095886230469, "rewards/rejected": -2.135754108428955, "step": 1280 }, { "epoch": 0.67, "learning_rate": 4.323006310958118e-07, "logits/chosen": -2.8162477016448975, "logits/rejected": -2.890505313873291, "logps/chosen": -323.9197692871094, "logps/rejected": -273.29632568359375, "loss": 0.5012, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.6982527375221252, "rewards/margins": 1.4712746143341064, "rewards/rejected": -2.169527292251587, "step": 1290 }, { "epoch": 0.67, "learning_rate": 4.313444253203289e-07, "logits/chosen": -2.783569812774658, "logits/rejected": -2.7339978218078613, "logps/chosen": -263.76531982421875, "logps/rejected": -239.9626922607422, "loss": 0.5372, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.8767280578613281, "rewards/margins": 1.3299367427825928, "rewards/rejected": -2.2066650390625, "step": 1300 }, { "epoch": 0.68, "learning_rate": 4.3038821954484603e-07, "logits/chosen": -2.7811901569366455, "logits/rejected": -2.737088918685913, "logps/chosen": -286.2353210449219, "logps/rejected": -256.3047180175781, "loss": 0.4805, "rewards/accuracies": 0.75, "rewards/chosen": -1.0940454006195068, "rewards/margins": 1.132187843322754, "rewards/rejected": -2.22623348236084, "step": 1310 }, { "epoch": 0.68, "learning_rate": 4.2943201376936315e-07, "logits/chosen": -2.726167917251587, "logits/rejected": -2.688605546951294, "logps/chosen": -282.53546142578125, "logps/rejected": -257.32373046875, "loss": 0.547, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.0240520238876343, "rewards/margins": 1.4254390001296997, "rewards/rejected": -2.449491024017334, "step": 1320 }, { "epoch": 0.69, "learning_rate": 4.2847580799388026e-07, "logits/chosen": -2.7360429763793945, "logits/rejected": -2.7077505588531494, "logps/chosen": -302.6694030761719, "logps/rejected": -286.1494140625, "loss": 0.5727, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.7704175114631653, "rewards/margins": 1.3586149215698242, "rewards/rejected": -2.1290321350097656, "step": 1330 }, { "epoch": 0.69, "learning_rate": 4.275196022183974e-07, "logits/chosen": -2.687389373779297, "logits/rejected": -2.7357983589172363, "logps/chosen": -301.34136962890625, "logps/rejected": -234.6636199951172, "loss": 0.5274, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.9257659912109375, "rewards/margins": 1.1349513530731201, "rewards/rejected": -2.0607173442840576, "step": 1340 }, { "epoch": 0.7, "learning_rate": 4.265633964429145e-07, "logits/chosen": -2.770357847213745, "logits/rejected": -2.746288776397705, "logps/chosen": -268.09576416015625, "logps/rejected": -213.56039428710938, "loss": 0.5658, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.6878668665885925, "rewards/margins": 1.0466588735580444, "rewards/rejected": -1.7345256805419922, "step": 1350 }, { "epoch": 0.7, "learning_rate": 4.256071906674316e-07, "logits/chosen": -2.7798330783843994, "logits/rejected": -2.805274486541748, "logps/chosen": -317.8987731933594, "logps/rejected": -276.82562255859375, "loss": 0.627, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.550291895866394, "rewards/margins": 1.2872064113616943, "rewards/rejected": -1.8374983072280884, "step": 1360 }, { "epoch": 0.71, "learning_rate": 4.246509848919487e-07, "logits/chosen": -2.8107523918151855, "logits/rejected": -2.842200756072998, "logps/chosen": -262.02874755859375, "logps/rejected": -265.257080078125, "loss": 0.5532, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.4975528120994568, "rewards/margins": 1.5920393466949463, "rewards/rejected": -2.089592218399048, "step": 1370 }, { "epoch": 0.71, "learning_rate": 4.2369477911646584e-07, "logits/chosen": -2.8077638149261475, "logits/rejected": -2.801124334335327, "logps/chosen": -273.29962158203125, "logps/rejected": -232.51559448242188, "loss": 0.5683, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.7220560312271118, "rewards/margins": 1.1169109344482422, "rewards/rejected": -1.8389670848846436, "step": 1380 }, { "epoch": 0.72, "learning_rate": 4.2273857334098296e-07, "logits/chosen": -2.7139172554016113, "logits/rejected": -2.7589526176452637, "logps/chosen": -287.19781494140625, "logps/rejected": -242.8853302001953, "loss": 0.5754, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.5711519122123718, "rewards/margins": 1.1763086318969727, "rewards/rejected": -1.7474607229232788, "step": 1390 }, { "epoch": 0.72, "learning_rate": 4.2178236756550007e-07, "logits/chosen": -2.696545124053955, "logits/rejected": -2.714484214782715, "logps/chosen": -297.3968505859375, "logps/rejected": -245.6906280517578, "loss": 0.6399, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.8875021934509277, "rewards/margins": 1.0341614484786987, "rewards/rejected": -1.9216636419296265, "step": 1400 }, { "epoch": 0.73, "learning_rate": 4.208261617900172e-07, "logits/chosen": -2.7567496299743652, "logits/rejected": -2.7744107246398926, "logps/chosen": -283.526611328125, "logps/rejected": -214.8311309814453, "loss": 0.4682, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.7966445684432983, "rewards/margins": 1.516661286354065, "rewards/rejected": -2.3133058547973633, "step": 1410 }, { "epoch": 0.73, "learning_rate": 4.198699560145343e-07, "logits/chosen": -2.694303512573242, "logits/rejected": -2.5877633094787598, "logps/chosen": -268.08709716796875, "logps/rejected": -245.7215576171875, "loss": 0.6334, "rewards/accuracies": 0.75, "rewards/chosen": -0.9871991872787476, "rewards/margins": 1.256882905960083, "rewards/rejected": -2.24408221244812, "step": 1420 }, { "epoch": 0.74, "learning_rate": 4.189137502390514e-07, "logits/chosen": -2.728818416595459, "logits/rejected": -2.7532124519348145, "logps/chosen": -279.16851806640625, "logps/rejected": -277.90826416015625, "loss": 0.6436, "rewards/accuracies": 0.6875, "rewards/chosen": -0.7359957098960876, "rewards/margins": 1.080309271812439, "rewards/rejected": -1.8163049221038818, "step": 1430 }, { "epoch": 0.74, "learning_rate": 4.179575444635686e-07, "logits/chosen": -2.718947649002075, "logits/rejected": -2.7442736625671387, "logps/chosen": -337.0057373046875, "logps/rejected": -276.1755676269531, "loss": 0.5953, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.6292416453361511, "rewards/margins": 1.0549640655517578, "rewards/rejected": -1.6842056512832642, "step": 1440 }, { "epoch": 0.75, "learning_rate": 4.170013386880857e-07, "logits/chosen": -2.7211525440216064, "logits/rejected": -2.678626775741577, "logps/chosen": -274.9634704589844, "logps/rejected": -284.86328125, "loss": 0.5041, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.6449888944625854, "rewards/margins": 1.4034655094146729, "rewards/rejected": -2.0484542846679688, "step": 1450 }, { "epoch": 0.75, "learning_rate": 4.1604513291260277e-07, "logits/chosen": -2.6686198711395264, "logits/rejected": -2.7258832454681396, "logps/chosen": -265.8595886230469, "logps/rejected": -262.75701904296875, "loss": 0.5408, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.6771360635757446, "rewards/margins": 1.6670234203338623, "rewards/rejected": -2.3441596031188965, "step": 1460 }, { "epoch": 0.76, "learning_rate": 4.150889271371199e-07, "logits/chosen": -2.618217945098877, "logits/rejected": -2.6638619899749756, "logps/chosen": -291.3425598144531, "logps/rejected": -239.32754516601562, "loss": 0.5876, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.7784534096717834, "rewards/margins": 1.4896891117095947, "rewards/rejected": -2.2681422233581543, "step": 1470 }, { "epoch": 0.76, "learning_rate": 4.14132721361637e-07, "logits/chosen": -2.626051187515259, "logits/rejected": -2.742926836013794, "logps/chosen": -277.51007080078125, "logps/rejected": -207.0497283935547, "loss": 0.4242, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.9323328733444214, "rewards/margins": 1.7172333002090454, "rewards/rejected": -2.649566173553467, "step": 1480 }, { "epoch": 0.77, "learning_rate": 4.131765155861541e-07, "logits/chosen": -2.5984182357788086, "logits/rejected": -2.634274482727051, "logps/chosen": -243.6094970703125, "logps/rejected": -216.2739715576172, "loss": 0.5322, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.8016673922538757, "rewards/margins": 1.508610486984253, "rewards/rejected": -2.3102779388427734, "step": 1490 }, { "epoch": 0.77, "learning_rate": 4.1222030981067123e-07, "logits/chosen": -2.6750636100769043, "logits/rejected": -2.736687183380127, "logps/chosen": -305.0487976074219, "logps/rejected": -273.23553466796875, "loss": 0.4388, "rewards/accuracies": 0.75, "rewards/chosen": -0.9884425401687622, "rewards/margins": 1.6452935934066772, "rewards/rejected": -2.6337361335754395, "step": 1500 }, { "epoch": 0.77, "eval_logits/chosen": -2.744432210922241, "eval_logits/rejected": -2.741956949234009, "eval_logps/chosen": -283.57977294921875, "eval_logps/rejected": -255.0812225341797, "eval_loss": 0.5201703310012817, "eval_rewards/accuracies": 0.7919999957084656, "eval_rewards/chosen": -0.7856327891349792, "eval_rewards/margins": 1.4863603115081787, "eval_rewards/rejected": -2.271993398666382, "eval_runtime": 454.3263, "eval_samples_per_second": 4.402, "eval_steps_per_second": 0.275, "step": 1500 }, { "epoch": 0.78, "learning_rate": 4.1126410403518835e-07, "logits/chosen": -2.6532881259918213, "logits/rejected": -2.7087454795837402, "logps/chosen": -256.3421936035156, "logps/rejected": -271.71173095703125, "loss": 0.5294, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.9765220880508423, "rewards/margins": 1.416123628616333, "rewards/rejected": -2.392645835876465, "step": 1510 }, { "epoch": 0.78, "learning_rate": 4.1030789825970546e-07, "logits/chosen": -2.638579845428467, "logits/rejected": -2.668213367462158, "logps/chosen": -310.85296630859375, "logps/rejected": -279.1716003417969, "loss": 0.5471, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.8188529014587402, "rewards/margins": 1.2735222578048706, "rewards/rejected": -2.092374801635742, "step": 1520 }, { "epoch": 0.79, "learning_rate": 4.093516924842226e-07, "logits/chosen": -2.7053446769714355, "logits/rejected": -2.705913543701172, "logps/chosen": -292.74359130859375, "logps/rejected": -268.7305908203125, "loss": 0.4502, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.3197025656700134, "rewards/margins": 1.5472514629364014, "rewards/rejected": -1.8669540882110596, "step": 1530 }, { "epoch": 0.8, "learning_rate": 4.083954867087397e-07, "logits/chosen": -2.820955276489258, "logits/rejected": -2.781850814819336, "logps/chosen": -263.65234375, "logps/rejected": -251.3944854736328, "loss": 0.5528, "rewards/accuracies": 0.6875, "rewards/chosen": -0.6488135457038879, "rewards/margins": 1.2752244472503662, "rewards/rejected": -1.9240379333496094, "step": 1540 }, { "epoch": 0.8, "learning_rate": 4.074392809332568e-07, "logits/chosen": -2.742565155029297, "logits/rejected": -2.7900197505950928, "logps/chosen": -328.9388732910156, "logps/rejected": -272.40667724609375, "loss": 0.5103, "rewards/accuracies": 0.8125, "rewards/chosen": -0.5513970255851746, "rewards/margins": 2.098090410232544, "rewards/rejected": -2.6494877338409424, "step": 1550 }, { "epoch": 0.81, "learning_rate": 4.064830751577739e-07, "logits/chosen": -2.8128983974456787, "logits/rejected": -2.783550977706909, "logps/chosen": -264.9004211425781, "logps/rejected": -248.5589141845703, "loss": 0.5191, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.4322876036167145, "rewards/margins": 1.4417130947113037, "rewards/rejected": -1.8740005493164062, "step": 1560 }, { "epoch": 0.81, "learning_rate": 4.0552686938229104e-07, "logits/chosen": -2.7836945056915283, "logits/rejected": -2.79536771774292, "logps/chosen": -264.26385498046875, "logps/rejected": -233.51791381835938, "loss": 0.4382, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.4552520215511322, "rewards/margins": 1.2747576236724854, "rewards/rejected": -1.73000967502594, "step": 1570 }, { "epoch": 0.82, "learning_rate": 4.045706636068082e-07, "logits/chosen": -2.7296574115753174, "logits/rejected": -2.784824848175049, "logps/chosen": -277.8669738769531, "logps/rejected": -247.8280792236328, "loss": 0.4745, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.6441946625709534, "rewards/margins": 1.571209192276001, "rewards/rejected": -2.2154037952423096, "step": 1580 }, { "epoch": 0.82, "learning_rate": 4.036144578313253e-07, "logits/chosen": -2.669766664505005, "logits/rejected": -2.749067544937134, "logps/chosen": -280.0926208496094, "logps/rejected": -257.1477966308594, "loss": 0.5338, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.6168959736824036, "rewards/margins": 1.3998132944107056, "rewards/rejected": -2.016709327697754, "step": 1590 }, { "epoch": 0.83, "learning_rate": 4.0265825205584244e-07, "logits/chosen": -2.7413103580474854, "logits/rejected": -2.712337017059326, "logps/chosen": -296.93768310546875, "logps/rejected": -267.0952453613281, "loss": 0.5875, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.6115021705627441, "rewards/margins": 1.2376142740249634, "rewards/rejected": -1.849116325378418, "step": 1600 }, { "epoch": 0.83, "learning_rate": 4.0170204628035956e-07, "logits/chosen": -2.6845192909240723, "logits/rejected": -2.729642152786255, "logps/chosen": -226.81564331054688, "logps/rejected": -218.29470825195312, "loss": 0.4993, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.4952973425388336, "rewards/margins": 1.4554481506347656, "rewards/rejected": -1.9507455825805664, "step": 1610 }, { "epoch": 0.84, "learning_rate": 4.007458405048766e-07, "logits/chosen": -2.680030345916748, "logits/rejected": -2.713993549346924, "logps/chosen": -313.85894775390625, "logps/rejected": -285.9293212890625, "loss": 0.4574, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.6897695660591125, "rewards/margins": 1.6455440521240234, "rewards/rejected": -2.3353137969970703, "step": 1620 }, { "epoch": 0.84, "learning_rate": 3.9978963472939373e-07, "logits/chosen": -2.647779941558838, "logits/rejected": -2.676213026046753, "logps/chosen": -284.97760009765625, "logps/rejected": -246.68838500976562, "loss": 0.4985, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.6747108697891235, "rewards/margins": 1.6222903728485107, "rewards/rejected": -2.297001361846924, "step": 1630 }, { "epoch": 0.85, "learning_rate": 3.9883342895391085e-07, "logits/chosen": -2.7175776958465576, "logits/rejected": -2.674654722213745, "logps/chosen": -325.5532531738281, "logps/rejected": -255.57589721679688, "loss": 0.4878, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.6802691221237183, "rewards/margins": 1.6955766677856445, "rewards/rejected": -2.3758456707000732, "step": 1640 }, { "epoch": 0.85, "learning_rate": 3.9787722317842796e-07, "logits/chosen": -2.7575249671936035, "logits/rejected": -2.7429704666137695, "logps/chosen": -295.3287353515625, "logps/rejected": -210.6437530517578, "loss": 0.5416, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.2862042784690857, "rewards/margins": 1.6433782577514648, "rewards/rejected": -1.9295822381973267, "step": 1650 }, { "epoch": 0.86, "learning_rate": 3.969210174029451e-07, "logits/chosen": -2.683577060699463, "logits/rejected": -2.5588877201080322, "logps/chosen": -284.94488525390625, "logps/rejected": -277.26715087890625, "loss": 0.5569, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.8422481417655945, "rewards/margins": 1.1587899923324585, "rewards/rejected": -2.001038074493408, "step": 1660 }, { "epoch": 0.86, "learning_rate": 3.959648116274622e-07, "logits/chosen": -2.6856627464294434, "logits/rejected": -2.720485210418701, "logps/chosen": -278.14727783203125, "logps/rejected": -235.4019012451172, "loss": 0.5908, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.8898976445198059, "rewards/margins": 0.9991616010665894, "rewards/rejected": -1.8890594244003296, "step": 1670 }, { "epoch": 0.87, "learning_rate": 3.950086058519793e-07, "logits/chosen": -2.7345478534698486, "logits/rejected": -2.7119522094726562, "logps/chosen": -251.06784057617188, "logps/rejected": -237.36767578125, "loss": 0.5711, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.6733026504516602, "rewards/margins": 1.1799070835113525, "rewards/rejected": -1.8532098531723022, "step": 1680 }, { "epoch": 0.87, "learning_rate": 3.9405240007649643e-07, "logits/chosen": -2.7996246814727783, "logits/rejected": -2.8810853958129883, "logps/chosen": -263.982421875, "logps/rejected": -253.3212127685547, "loss": 0.4917, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.6147769093513489, "rewards/margins": 1.3312063217163086, "rewards/rejected": -1.9459832906723022, "step": 1690 }, { "epoch": 0.88, "learning_rate": 3.9309619430101354e-07, "logits/chosen": -2.711601734161377, "logits/rejected": -2.8019349575042725, "logps/chosen": -267.4760437011719, "logps/rejected": -244.1147918701172, "loss": 0.5695, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.6157582402229309, "rewards/margins": 1.292510747909546, "rewards/rejected": -1.9082691669464111, "step": 1700 }, { "epoch": 0.88, "learning_rate": 3.9213998852553066e-07, "logits/chosen": -2.7815704345703125, "logits/rejected": -2.810908555984497, "logps/chosen": -328.38531494140625, "logps/rejected": -266.3671569824219, "loss": 0.5704, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.7039732933044434, "rewards/margins": 1.1600509881973267, "rewards/rejected": -1.8640244007110596, "step": 1710 }, { "epoch": 0.89, "learning_rate": 3.9118378275004783e-07, "logits/chosen": -2.833494186401367, "logits/rejected": -2.773686170578003, "logps/chosen": -274.27349853515625, "logps/rejected": -314.40447998046875, "loss": 0.5537, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.7931226491928101, "rewards/margins": 0.9070862531661987, "rewards/rejected": -1.7002089023590088, "step": 1720 }, { "epoch": 0.89, "learning_rate": 3.9022757697456494e-07, "logits/chosen": -2.6811347007751465, "logits/rejected": -2.7765626907348633, "logps/chosen": -344.49310302734375, "logps/rejected": -281.51678466796875, "loss": 0.5263, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.6116336584091187, "rewards/margins": 1.0795291662216187, "rewards/rejected": -1.6911628246307373, "step": 1730 }, { "epoch": 0.9, "learning_rate": 3.8927137119908206e-07, "logits/chosen": -2.7077999114990234, "logits/rejected": -2.7165045738220215, "logps/chosen": -303.66107177734375, "logps/rejected": -229.548095703125, "loss": 0.5082, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.65656977891922, "rewards/margins": 1.3464813232421875, "rewards/rejected": -2.003051280975342, "step": 1740 }, { "epoch": 0.9, "learning_rate": 3.883151654235992e-07, "logits/chosen": -2.7294795513153076, "logits/rejected": -2.7539401054382324, "logps/chosen": -296.2801818847656, "logps/rejected": -263.555908203125, "loss": 0.5555, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5686991214752197, "rewards/margins": 1.5480784177780151, "rewards/rejected": -2.1167776584625244, "step": 1750 }, { "epoch": 0.91, "learning_rate": 3.873589596481163e-07, "logits/chosen": -2.785902261734009, "logits/rejected": -2.778597593307495, "logps/chosen": -294.0135192871094, "logps/rejected": -264.5918884277344, "loss": 0.5686, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.79108726978302, "rewards/margins": 1.2999193668365479, "rewards/rejected": -2.0910067558288574, "step": 1760 }, { "epoch": 0.91, "learning_rate": 3.864027538726334e-07, "logits/chosen": -2.640261173248291, "logits/rejected": -2.7362558841705322, "logps/chosen": -277.26153564453125, "logps/rejected": -251.80453491210938, "loss": 0.5828, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.48934370279312134, "rewards/margins": 1.6264740228652954, "rewards/rejected": -2.1158177852630615, "step": 1770 }, { "epoch": 0.92, "learning_rate": 3.8544654809715047e-07, "logits/chosen": -2.7447402477264404, "logits/rejected": -2.735809803009033, "logps/chosen": -291.24432373046875, "logps/rejected": -259.63238525390625, "loss": 0.5448, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.8588013648986816, "rewards/margins": 1.3649778366088867, "rewards/rejected": -2.2237792015075684, "step": 1780 }, { "epoch": 0.92, "learning_rate": 3.844903423216676e-07, "logits/chosen": -2.689377784729004, "logits/rejected": -2.642862319946289, "logps/chosen": -270.98809814453125, "logps/rejected": -237.0521240234375, "loss": 0.5087, "rewards/accuracies": 0.75, "rewards/chosen": -0.5103562474250793, "rewards/margins": 1.4526978731155396, "rewards/rejected": -1.9630540609359741, "step": 1790 }, { "epoch": 0.93, "learning_rate": 3.835341365461847e-07, "logits/chosen": -2.6891281604766846, "logits/rejected": -2.6219875812530518, "logps/chosen": -276.0992126464844, "logps/rejected": -229.54647827148438, "loss": 0.5346, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.5371229648590088, "rewards/margins": 1.4801558256149292, "rewards/rejected": -2.0172784328460693, "step": 1800 }, { "epoch": 0.93, "learning_rate": 3.825779307707018e-07, "logits/chosen": -2.6955854892730713, "logits/rejected": -2.6945228576660156, "logps/chosen": -209.4112548828125, "logps/rejected": -230.22689819335938, "loss": 0.5242, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.609640896320343, "rewards/margins": 1.1101809740066528, "rewards/rejected": -1.7198219299316406, "step": 1810 }, { "epoch": 0.94, "learning_rate": 3.8162172499521893e-07, "logits/chosen": -2.7102739810943604, "logits/rejected": -2.7950327396392822, "logps/chosen": -267.4234619140625, "logps/rejected": -218.8787078857422, "loss": 0.5549, "rewards/accuracies": 0.75, "rewards/chosen": -0.6695321798324585, "rewards/margins": 1.3147714138031006, "rewards/rejected": -1.9843038320541382, "step": 1820 }, { "epoch": 0.94, "learning_rate": 3.8066551921973605e-07, "logits/chosen": -2.7370214462280273, "logits/rejected": -2.7310547828674316, "logps/chosen": -261.2284240722656, "logps/rejected": -237.5937957763672, "loss": 0.4986, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.37716975808143616, "rewards/margins": 1.3623695373535156, "rewards/rejected": -1.739539384841919, "step": 1830 }, { "epoch": 0.95, "learning_rate": 3.7970931344425316e-07, "logits/chosen": -2.667738437652588, "logits/rejected": -2.7235381603240967, "logps/chosen": -299.5805358886719, "logps/rejected": -240.5843505859375, "loss": 0.4653, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.5832570195198059, "rewards/margins": 1.1436893939971924, "rewards/rejected": -1.726946473121643, "step": 1840 }, { "epoch": 0.96, "learning_rate": 3.787531076687703e-07, "logits/chosen": -2.5823702812194824, "logits/rejected": -2.6130504608154297, "logps/chosen": -258.65521240234375, "logps/rejected": -211.156005859375, "loss": 0.5161, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.46967166662216187, "rewards/margins": 1.632408857345581, "rewards/rejected": -2.1020803451538086, "step": 1850 }, { "epoch": 0.96, "learning_rate": 3.7779690189328745e-07, "logits/chosen": -2.7206063270568848, "logits/rejected": -2.7099740505218506, "logps/chosen": -260.58782958984375, "logps/rejected": -236.6365203857422, "loss": 0.4783, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.6446757316589355, "rewards/margins": 1.2911062240600586, "rewards/rejected": -1.9357818365097046, "step": 1860 }, { "epoch": 0.97, "learning_rate": 3.7684069611780456e-07, "logits/chosen": -2.6849565505981445, "logits/rejected": -2.760693073272705, "logps/chosen": -289.33135986328125, "logps/rejected": -252.05615234375, "loss": 0.5354, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.3844161331653595, "rewards/margins": 1.464693546295166, "rewards/rejected": -1.8491096496582031, "step": 1870 }, { "epoch": 0.97, "learning_rate": 3.758844903423217e-07, "logits/chosen": -2.770711898803711, "logits/rejected": -2.7400715351104736, "logps/chosen": -257.98223876953125, "logps/rejected": -249.1288604736328, "loss": 0.5816, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5955790281295776, "rewards/margins": 1.141638994216919, "rewards/rejected": -1.737217903137207, "step": 1880 }, { "epoch": 0.98, "learning_rate": 3.749282845668388e-07, "logits/chosen": -2.781506061553955, "logits/rejected": -2.7431671619415283, "logps/chosen": -307.255615234375, "logps/rejected": -271.7546081542969, "loss": 0.5382, "rewards/accuracies": 0.75, "rewards/chosen": -0.7323135137557983, "rewards/margins": 1.2955814599990845, "rewards/rejected": -2.027894973754883, "step": 1890 }, { "epoch": 0.98, "learning_rate": 3.739720787913559e-07, "logits/chosen": -2.6961607933044434, "logits/rejected": -2.7294247150421143, "logps/chosen": -267.5713195800781, "logps/rejected": -234.83511352539062, "loss": 0.5006, "rewards/accuracies": 0.6875, "rewards/chosen": -0.7748560905456543, "rewards/margins": 1.3794810771942139, "rewards/rejected": -2.1543374061584473, "step": 1900 }, { "epoch": 0.99, "learning_rate": 3.73015873015873e-07, "logits/chosen": -2.6831729412078857, "logits/rejected": -2.697099208831787, "logps/chosen": -286.3196105957031, "logps/rejected": -262.9601135253906, "loss": 0.5058, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3332211971282959, "rewards/margins": 1.1857125759124756, "rewards/rejected": -1.518933653831482, "step": 1910 }, { "epoch": 0.99, "learning_rate": 3.7205966724039014e-07, "logits/chosen": -2.5785605907440186, "logits/rejected": -2.6058080196380615, "logps/chosen": -294.5285339355469, "logps/rejected": -252.9921112060547, "loss": 0.5, "rewards/accuracies": 0.75, "rewards/chosen": -0.6818346381187439, "rewards/margins": 1.2134017944335938, "rewards/rejected": -1.895236611366272, "step": 1920 }, { "epoch": 1.0, "learning_rate": 3.711034614649072e-07, "logits/chosen": -2.751162528991699, "logits/rejected": -2.7023558616638184, "logps/chosen": -310.20550537109375, "logps/rejected": -245.3313751220703, "loss": 0.4926, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.6286559700965881, "rewards/margins": 1.6120086908340454, "rewards/rejected": -2.24066424369812, "step": 1930 }, { "epoch": 1.0, "learning_rate": 3.701472556894243e-07, "logits/chosen": -2.6212620735168457, "logits/rejected": -2.65401029586792, "logps/chosen": -253.7644500732422, "logps/rejected": -285.94927978515625, "loss": 0.3858, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.09027113765478134, "rewards/margins": 2.7949814796447754, "rewards/rejected": -2.7047104835510254, "step": 1940 }, { "epoch": 1.01, "learning_rate": 3.6919104991394144e-07, "logits/chosen": -2.749549388885498, "logits/rejected": -2.7355539798736572, "logps/chosen": -262.36444091796875, "logps/rejected": -266.4080505371094, "loss": 0.0969, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 1.437258005142212, "rewards/margins": 5.972604751586914, "rewards/rejected": -4.535346508026123, "step": 1950 }, { "epoch": 1.01, "learning_rate": 3.6823484413845855e-07, "logits/chosen": -2.644953966140747, "logits/rejected": -2.643900156021118, "logps/chosen": -271.19232177734375, "logps/rejected": -290.2998352050781, "loss": 0.1308, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.7727033495903015, "rewards/margins": 5.208821773529053, "rewards/rejected": -4.436118125915527, "step": 1960 }, { "epoch": 1.02, "learning_rate": 3.6727863836297567e-07, "logits/chosen": -2.7043509483337402, "logits/rejected": -2.6822307109832764, "logps/chosen": -246.55972290039062, "logps/rejected": -267.2857971191406, "loss": 0.0869, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 1.1716320514678955, "rewards/margins": 6.148016929626465, "rewards/rejected": -4.976385116577148, "step": 1970 }, { "epoch": 1.02, "learning_rate": 3.663224325874928e-07, "logits/chosen": -2.6752538681030273, "logits/rejected": -2.640815019607544, "logps/chosen": -243.1986083984375, "logps/rejected": -281.6815185546875, "loss": 0.108, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.6313158273696899, "rewards/margins": 5.219941139221191, "rewards/rejected": -4.588625907897949, "step": 1980 }, { "epoch": 1.03, "learning_rate": 3.653662268120099e-07, "logits/chosen": -2.6890158653259277, "logits/rejected": -2.636115312576294, "logps/chosen": -250.166748046875, "logps/rejected": -315.3585510253906, "loss": 0.0855, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 1.1859726905822754, "rewards/margins": 5.5249199867248535, "rewards/rejected": -4.338947772979736, "step": 1990 }, { "epoch": 1.03, "learning_rate": 3.6441002103652707e-07, "logits/chosen": -2.6496829986572266, "logits/rejected": -2.697693347930908, "logps/chosen": -268.20355224609375, "logps/rejected": -300.24993896484375, "loss": 0.0651, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.7303746938705444, "rewards/margins": 5.9827704429626465, "rewards/rejected": -5.2523956298828125, "step": 2000 }, { "epoch": 1.03, "eval_logits/chosen": -2.7412195205688477, "eval_logits/rejected": -2.733457565307617, "eval_logps/chosen": -285.7675476074219, "eval_logps/rejected": -261.06353759765625, "eval_loss": 0.5048810243606567, "eval_rewards/accuracies": 0.7860000133514404, "eval_rewards/chosen": -1.0044103860855103, "eval_rewards/margins": 1.8658151626586914, "eval_rewards/rejected": -2.870225667953491, "eval_runtime": 453.132, "eval_samples_per_second": 4.414, "eval_steps_per_second": 0.276, "step": 2000 }, { "epoch": 1.04, "learning_rate": 3.634538152610442e-07, "logits/chosen": -2.6881024837493896, "logits/rejected": -2.7203176021575928, "logps/chosen": -273.32574462890625, "logps/rejected": -304.735107421875, "loss": 0.0758, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 1.1392464637756348, "rewards/margins": 6.492955207824707, "rewards/rejected": -5.353708744049072, "step": 2010 }, { "epoch": 1.04, "learning_rate": 3.624976094855613e-07, "logits/chosen": -2.643216609954834, "logits/rejected": -2.6377549171447754, "logps/chosen": -258.87518310546875, "logps/rejected": -269.3851318359375, "loss": 0.0707, "rewards/accuracies": 1.0, "rewards/chosen": 0.7527416944503784, "rewards/margins": 5.787278175354004, "rewards/rejected": -5.034537315368652, "step": 2020 }, { "epoch": 1.05, "learning_rate": 3.615414037100784e-07, "logits/chosen": -2.624013900756836, "logits/rejected": -2.6467807292938232, "logps/chosen": -287.6932678222656, "logps/rejected": -270.98529052734375, "loss": 0.0997, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.3131764531135559, "rewards/margins": 5.307021141052246, "rewards/rejected": -4.993845462799072, "step": 2030 }, { "epoch": 1.05, "learning_rate": 3.6058519793459553e-07, "logits/chosen": -2.583369493484497, "logits/rejected": -2.6954777240753174, "logps/chosen": -241.7144317626953, "logps/rejected": -249.5287322998047, "loss": 0.0879, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.655167818069458, "rewards/margins": 5.203488826751709, "rewards/rejected": -4.548320770263672, "step": 2040 }, { "epoch": 1.06, "learning_rate": 3.5962899215911265e-07, "logits/chosen": -2.672328472137451, "logits/rejected": -2.6205334663391113, "logps/chosen": -239.5384979248047, "logps/rejected": -288.34893798828125, "loss": 0.0544, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.5333976745605469, "rewards/margins": 5.3222761154174805, "rewards/rejected": -4.788878440856934, "step": 2050 }, { "epoch": 1.06, "learning_rate": 3.5867278638362976e-07, "logits/chosen": -2.6396572589874268, "logits/rejected": -2.728149652481079, "logps/chosen": -308.57110595703125, "logps/rejected": -312.836181640625, "loss": 0.083, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.8408229947090149, "rewards/margins": 5.805523872375488, "rewards/rejected": -4.964701175689697, "step": 2060 }, { "epoch": 1.07, "learning_rate": 3.577165806081469e-07, "logits/chosen": -2.668506145477295, "logits/rejected": -2.717451810836792, "logps/chosen": -257.14599609375, "logps/rejected": -260.1763916015625, "loss": 0.0988, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 0.4912979006767273, "rewards/margins": 5.213704586029053, "rewards/rejected": -4.72240686416626, "step": 2070 }, { "epoch": 1.07, "learning_rate": 3.56760374832664e-07, "logits/chosen": -2.7033512592315674, "logits/rejected": -2.676906108856201, "logps/chosen": -290.5976867675781, "logps/rejected": -317.9124450683594, "loss": 0.0423, "rewards/accuracies": 1.0, "rewards/chosen": 1.4838396310806274, "rewards/margins": 7.55858850479126, "rewards/rejected": -6.0747480392456055, "step": 2080 }, { "epoch": 1.08, "learning_rate": 3.5580416905718106e-07, "logits/chosen": -2.647249221801758, "logits/rejected": -2.7124292850494385, "logps/chosen": -305.52313232421875, "logps/rejected": -265.80853271484375, "loss": 0.1056, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.6061897277832031, "rewards/margins": 5.471752166748047, "rewards/rejected": -4.86556339263916, "step": 2090 }, { "epoch": 1.08, "learning_rate": 3.5484796328169817e-07, "logits/chosen": -2.633441686630249, "logits/rejected": -2.7382283210754395, "logps/chosen": -268.3299560546875, "logps/rejected": -253.24844360351562, "loss": 0.07, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.5186059474945068, "rewards/margins": 5.430913925170898, "rewards/rejected": -4.9123077392578125, "step": 2100 }, { "epoch": 1.09, "learning_rate": 3.538917575062153e-07, "logits/chosen": -2.594312906265259, "logits/rejected": -2.6324267387390137, "logps/chosen": -267.57550048828125, "logps/rejected": -314.03460693359375, "loss": 0.0835, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.33759838342666626, "rewards/margins": 6.359553813934326, "rewards/rejected": -6.021955490112305, "step": 2110 }, { "epoch": 1.09, "learning_rate": 3.529355517307324e-07, "logits/chosen": -2.6919217109680176, "logits/rejected": -2.6473464965820312, "logps/chosen": -247.5459747314453, "logps/rejected": -294.58917236328125, "loss": 0.0602, "rewards/accuracies": 1.0, "rewards/chosen": 0.8844423294067383, "rewards/margins": 6.708581447601318, "rewards/rejected": -5.824139595031738, "step": 2120 }, { "epoch": 1.1, "learning_rate": 3.519793459552495e-07, "logits/chosen": -2.7207422256469727, "logits/rejected": -2.644496440887451, "logps/chosen": -267.00518798828125, "logps/rejected": -298.27508544921875, "loss": 0.0747, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.5102488398551941, "rewards/margins": 5.6486406326293945, "rewards/rejected": -5.138392448425293, "step": 2130 }, { "epoch": 1.1, "learning_rate": 3.510231401797667e-07, "logits/chosen": -2.675915479660034, "logits/rejected": -2.623983860015869, "logps/chosen": -301.0896301269531, "logps/rejected": -301.5240478515625, "loss": 0.092, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.039230428636074066, "rewards/margins": 6.0254340171813965, "rewards/rejected": -6.064664363861084, "step": 2140 }, { "epoch": 1.11, "learning_rate": 3.500669344042838e-07, "logits/chosen": -2.7287375926971436, "logits/rejected": -2.673964738845825, "logps/chosen": -256.0411682128906, "logps/rejected": -286.02166748046875, "loss": 0.082, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.15862272679805756, "rewards/margins": 5.4255757331848145, "rewards/rejected": -5.584197998046875, "step": 2150 }, { "epoch": 1.12, "learning_rate": 3.491107286288009e-07, "logits/chosen": -2.7414021492004395, "logits/rejected": -2.6649553775787354, "logps/chosen": -271.9679870605469, "logps/rejected": -304.8786926269531, "loss": 0.1284, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.0953565314412117, "rewards/margins": 5.869844436645508, "rewards/rejected": -5.965200901031494, "step": 2160 }, { "epoch": 1.12, "learning_rate": 3.4815452285331803e-07, "logits/chosen": -2.659775495529175, "logits/rejected": -2.6203103065490723, "logps/chosen": -289.34613037109375, "logps/rejected": -282.0144958496094, "loss": 0.0941, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.19078879058361053, "rewards/margins": 5.521025657653809, "rewards/rejected": -5.7118144035339355, "step": 2170 }, { "epoch": 1.13, "learning_rate": 3.4719831707783515e-07, "logits/chosen": -2.6245665550231934, "logits/rejected": -2.659949779510498, "logps/chosen": -300.9286193847656, "logps/rejected": -322.0133056640625, "loss": 0.0888, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.270366370677948, "rewards/margins": 6.352386474609375, "rewards/rejected": -6.082020282745361, "step": 2180 }, { "epoch": 1.13, "learning_rate": 3.4624211130235227e-07, "logits/chosen": -2.706652879714966, "logits/rejected": -2.7077231407165527, "logps/chosen": -248.646484375, "logps/rejected": -265.32366943359375, "loss": 0.085, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.16835294663906097, "rewards/margins": 5.832309246063232, "rewards/rejected": -6.000662326812744, "step": 2190 }, { "epoch": 1.14, "learning_rate": 3.452859055268694e-07, "logits/chosen": -2.6764402389526367, "logits/rejected": -2.740021228790283, "logps/chosen": -256.58892822265625, "logps/rejected": -270.4902648925781, "loss": 0.0928, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.03022139146924019, "rewards/margins": 6.150148391723633, "rewards/rejected": -6.119926929473877, "step": 2200 }, { "epoch": 1.14, "learning_rate": 3.443296997513865e-07, "logits/chosen": -2.775790214538574, "logits/rejected": -2.727541446685791, "logps/chosen": -300.582275390625, "logps/rejected": -329.0697326660156, "loss": 0.1029, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.46084627509117126, "rewards/margins": 6.592850685119629, "rewards/rejected": -6.132004261016846, "step": 2210 }, { "epoch": 1.15, "learning_rate": 3.433734939759036e-07, "logits/chosen": -2.733781337738037, "logits/rejected": -2.6996545791625977, "logps/chosen": -289.87286376953125, "logps/rejected": -335.86883544921875, "loss": 0.1133, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.7392681241035461, "rewards/margins": 6.866480350494385, "rewards/rejected": -6.127212047576904, "step": 2220 }, { "epoch": 1.15, "learning_rate": 3.4241728820042073e-07, "logits/chosen": -2.6640281677246094, "logits/rejected": -2.626359224319458, "logps/chosen": -248.7790985107422, "logps/rejected": -306.9777526855469, "loss": 0.1217, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": -0.4639340043067932, "rewards/margins": 5.437063694000244, "rewards/rejected": -5.900998115539551, "step": 2230 }, { "epoch": 1.16, "learning_rate": 3.4146108242493784e-07, "logits/chosen": -2.7318482398986816, "logits/rejected": -2.7393126487731934, "logps/chosen": -236.71939086914062, "logps/rejected": -281.84332275390625, "loss": 0.1461, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.26268935203552246, "rewards/margins": 5.945524215698242, "rewards/rejected": -5.682834625244141, "step": 2240 }, { "epoch": 1.16, "learning_rate": 3.405048766494549e-07, "logits/chosen": -2.655292272567749, "logits/rejected": -2.6065621376037598, "logps/chosen": -292.45843505859375, "logps/rejected": -272.82489013671875, "loss": 0.1152, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.3397136926651001, "rewards/margins": 5.560425281524658, "rewards/rejected": -5.220711708068848, "step": 2250 }, { "epoch": 1.17, "learning_rate": 3.39548670873972e-07, "logits/chosen": -2.5906424522399902, "logits/rejected": -2.5299315452575684, "logps/chosen": -310.4429626464844, "logps/rejected": -342.58355712890625, "loss": 0.1157, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.9430241584777832, "rewards/margins": 7.4350996017456055, "rewards/rejected": -6.4920759201049805, "step": 2260 }, { "epoch": 1.17, "learning_rate": 3.3859246509848914e-07, "logits/chosen": -2.5732579231262207, "logits/rejected": -2.608187198638916, "logps/chosen": -274.4580993652344, "logps/rejected": -302.9696960449219, "loss": 0.0827, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.47884711623191833, "rewards/margins": 5.948543071746826, "rewards/rejected": -5.469696044921875, "step": 2270 }, { "epoch": 1.18, "learning_rate": 3.376362593230063e-07, "logits/chosen": -2.5940709114074707, "logits/rejected": -2.579684257507324, "logps/chosen": -254.5391082763672, "logps/rejected": -310.8402404785156, "loss": 0.0961, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.21051082015037537, "rewards/margins": 6.346388816833496, "rewards/rejected": -6.13587760925293, "step": 2280 }, { "epoch": 1.18, "learning_rate": 3.366800535475234e-07, "logits/chosen": -2.609266757965088, "logits/rejected": -2.4904086589813232, "logps/chosen": -291.47637939453125, "logps/rejected": -287.4329833984375, "loss": 0.0767, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.9694992899894714, "rewards/margins": 7.04742431640625, "rewards/rejected": -6.077925205230713, "step": 2290 }, { "epoch": 1.19, "learning_rate": 3.3572384777204054e-07, "logits/chosen": -2.6758570671081543, "logits/rejected": -2.6098129749298096, "logps/chosen": -292.984130859375, "logps/rejected": -281.83056640625, "loss": 0.0861, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.6002046465873718, "rewards/margins": 6.433353424072266, "rewards/rejected": -5.833148002624512, "step": 2300 }, { "epoch": 1.19, "learning_rate": 3.3476764199655765e-07, "logits/chosen": -2.49656343460083, "logits/rejected": -2.542083501815796, "logps/chosen": -214.75625610351562, "logps/rejected": -274.36273193359375, "loss": 0.0838, "rewards/accuracies": 0.9375, "rewards/chosen": 0.3019743859767914, "rewards/margins": 6.335268974304199, "rewards/rejected": -6.033293724060059, "step": 2310 }, { "epoch": 1.2, "learning_rate": 3.3381143622107477e-07, "logits/chosen": -2.5624756813049316, "logits/rejected": -2.5002315044403076, "logps/chosen": -310.534912109375, "logps/rejected": -313.9328308105469, "loss": 0.0543, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.4683504104614258, "rewards/margins": 6.916632175445557, "rewards/rejected": -6.448281288146973, "step": 2320 }, { "epoch": 1.2, "learning_rate": 3.328552304455919e-07, "logits/chosen": -2.5515811443328857, "logits/rejected": -2.5180673599243164, "logps/chosen": -260.62518310546875, "logps/rejected": -287.25152587890625, "loss": 0.073, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.19289037585258484, "rewards/margins": 6.111286163330078, "rewards/rejected": -5.918396472930908, "step": 2330 }, { "epoch": 1.21, "learning_rate": 3.31899024670109e-07, "logits/chosen": -2.5265719890594482, "logits/rejected": -2.423987865447998, "logps/chosen": -279.4831237792969, "logps/rejected": -292.02935791015625, "loss": 0.0825, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28345030546188354, "rewards/margins": 6.494320869445801, "rewards/rejected": -6.210869789123535, "step": 2340 }, { "epoch": 1.21, "learning_rate": 3.309428188946261e-07, "logits/chosen": -2.422034978866577, "logits/rejected": -2.483975410461426, "logps/chosen": -257.01348876953125, "logps/rejected": -276.59686279296875, "loss": 0.0929, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.13130250573158264, "rewards/margins": 5.311938285827637, "rewards/rejected": -5.443240165710449, "step": 2350 }, { "epoch": 1.22, "learning_rate": 3.2998661311914323e-07, "logits/chosen": -2.571733236312866, "logits/rejected": -2.4926095008850098, "logps/chosen": -268.9256591796875, "logps/rejected": -298.5318603515625, "loss": 0.0788, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.22903628647327423, "rewards/margins": 6.610577583312988, "rewards/rejected": -6.381541728973389, "step": 2360 }, { "epoch": 1.22, "learning_rate": 3.2903040734366035e-07, "logits/chosen": -2.5603127479553223, "logits/rejected": -2.584486722946167, "logps/chosen": -249.293212890625, "logps/rejected": -303.9468688964844, "loss": 0.0726, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.38736894726753235, "rewards/margins": 6.439343452453613, "rewards/rejected": -6.051974296569824, "step": 2370 }, { "epoch": 1.23, "learning_rate": 3.2807420156817746e-07, "logits/chosen": -2.4685256481170654, "logits/rejected": -2.4727115631103516, "logps/chosen": -247.81320190429688, "logps/rejected": -277.7972717285156, "loss": 0.2207, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.3868095874786377, "rewards/margins": 5.795322418212891, "rewards/rejected": -5.408513069152832, "step": 2380 }, { "epoch": 1.23, "learning_rate": 3.271179957926946e-07, "logits/chosen": -2.5883705615997314, "logits/rejected": -2.542631149291992, "logps/chosen": -282.636962890625, "logps/rejected": -294.6969299316406, "loss": 0.098, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.005316281225532293, "rewards/margins": 6.369308948516846, "rewards/rejected": -6.363993167877197, "step": 2390 }, { "epoch": 1.24, "learning_rate": 3.261617900172117e-07, "logits/chosen": -2.628528118133545, "logits/rejected": -2.605329990386963, "logps/chosen": -239.5411834716797, "logps/rejected": -289.7711181640625, "loss": 0.1672, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.6569315195083618, "rewards/margins": 5.836136817932129, "rewards/rejected": -5.179205894470215, "step": 2400 }, { "epoch": 1.24, "learning_rate": 3.2520558424172876e-07, "logits/chosen": -2.669081211090088, "logits/rejected": -2.6567091941833496, "logps/chosen": -270.3697814941406, "logps/rejected": -276.68780517578125, "loss": 0.0625, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.47786712646484375, "rewards/margins": 6.1428542137146, "rewards/rejected": -5.664987564086914, "step": 2410 }, { "epoch": 1.25, "learning_rate": 3.242493784662459e-07, "logits/chosen": -2.595695972442627, "logits/rejected": -2.6479713916778564, "logps/chosen": -257.28521728515625, "logps/rejected": -289.1241760253906, "loss": 0.1023, "rewards/accuracies": 0.9375, "rewards/chosen": 0.023120785132050514, "rewards/margins": 6.379412651062012, "rewards/rejected": -6.356291770935059, "step": 2420 }, { "epoch": 1.25, "learning_rate": 3.2329317269076304e-07, "logits/chosen": -2.633089542388916, "logits/rejected": -2.5774524211883545, "logps/chosen": -285.16400146484375, "logps/rejected": -334.11822509765625, "loss": 0.1087, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.5232380628585815, "rewards/margins": 6.513851165771484, "rewards/rejected": -5.990612983703613, "step": 2430 }, { "epoch": 1.26, "learning_rate": 3.2233696691528016e-07, "logits/chosen": -2.5757319927215576, "logits/rejected": -2.551025152206421, "logps/chosen": -255.3240509033203, "logps/rejected": -311.89337158203125, "loss": 0.1209, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.6545261144638062, "rewards/margins": 6.234076023101807, "rewards/rejected": -5.579549789428711, "step": 2440 }, { "epoch": 1.26, "learning_rate": 3.2138076113979727e-07, "logits/chosen": -2.6312050819396973, "logits/rejected": -2.539020538330078, "logps/chosen": -278.7816162109375, "logps/rejected": -318.1822509765625, "loss": 0.0936, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.04061394929885864, "rewards/margins": 6.488400936126709, "rewards/rejected": -6.447786808013916, "step": 2450 }, { "epoch": 1.27, "learning_rate": 3.204245553643144e-07, "logits/chosen": -2.5656325817108154, "logits/rejected": -2.5055129528045654, "logps/chosen": -309.80218505859375, "logps/rejected": -306.3583679199219, "loss": 0.0857, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.1567656546831131, "rewards/margins": 5.968165397644043, "rewards/rejected": -5.811399936676025, "step": 2460 }, { "epoch": 1.28, "learning_rate": 3.194683495888315e-07, "logits/chosen": -2.6194345951080322, "logits/rejected": -2.524545431137085, "logps/chosen": -304.95233154296875, "logps/rejected": -278.0549621582031, "loss": 0.1175, "rewards/accuracies": 0.9375, "rewards/chosen": 0.33243006467819214, "rewards/margins": 5.192269802093506, "rewards/rejected": -4.85983943939209, "step": 2470 }, { "epoch": 1.28, "learning_rate": 3.185121438133486e-07, "logits/chosen": -2.5585827827453613, "logits/rejected": -2.6347527503967285, "logps/chosen": -282.5390625, "logps/rejected": -360.55792236328125, "loss": 0.086, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.7870206832885742, "rewards/margins": 7.211942195892334, "rewards/rejected": -6.424921989440918, "step": 2480 }, { "epoch": 1.29, "learning_rate": 3.1755593803786574e-07, "logits/chosen": -2.5586822032928467, "logits/rejected": -2.5810511112213135, "logps/chosen": -228.8177490234375, "logps/rejected": -280.386962890625, "loss": 0.1051, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": -0.21440930664539337, "rewards/margins": 5.810070991516113, "rewards/rejected": -6.02448034286499, "step": 2490 }, { "epoch": 1.29, "learning_rate": 3.1659973226238285e-07, "logits/chosen": -2.516174793243408, "logits/rejected": -2.523841381072998, "logps/chosen": -278.87884521484375, "logps/rejected": -282.09747314453125, "loss": 0.0887, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.04227827861905098, "rewards/margins": 6.174509048461914, "rewards/rejected": -6.132230281829834, "step": 2500 }, { "epoch": 1.29, "eval_logits/chosen": -2.6173453330993652, "eval_logits/rejected": -2.5939838886260986, "eval_logps/chosen": -295.6112976074219, "eval_logps/rejected": -271.6174621582031, "eval_loss": 0.5945637226104736, "eval_rewards/accuracies": 0.7480000257492065, "eval_rewards/chosen": -1.9887875318527222, "eval_rewards/margins": 1.9368335008621216, "eval_rewards/rejected": -3.925621271133423, "eval_runtime": 453.7931, "eval_samples_per_second": 4.407, "eval_steps_per_second": 0.275, "step": 2500 }, { "epoch": 1.3, "learning_rate": 3.1564352648689997e-07, "logits/chosen": -2.6018643379211426, "logits/rejected": -2.6282429695129395, "logps/chosen": -302.58746337890625, "logps/rejected": -285.2703857421875, "loss": 0.0985, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.127798393368721, "rewards/margins": 5.529982089996338, "rewards/rejected": -5.40218448638916, "step": 2510 }, { "epoch": 1.3, "learning_rate": 3.146873207114171e-07, "logits/chosen": -2.5892491340637207, "logits/rejected": -2.5546836853027344, "logps/chosen": -241.8780975341797, "logps/rejected": -288.4834899902344, "loss": 0.1053, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.20376138389110565, "rewards/margins": 6.397191047668457, "rewards/rejected": -6.193429946899414, "step": 2520 }, { "epoch": 1.31, "learning_rate": 3.137311149359342e-07, "logits/chosen": -2.653477191925049, "logits/rejected": -2.6052021980285645, "logps/chosen": -312.23828125, "logps/rejected": -331.75006103515625, "loss": 0.0965, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2199176847934723, "rewards/margins": 5.913000106811523, "rewards/rejected": -5.693081855773926, "step": 2530 }, { "epoch": 1.31, "learning_rate": 3.127749091604513e-07, "logits/chosen": -2.583885669708252, "logits/rejected": -2.624443531036377, "logps/chosen": -303.43658447265625, "logps/rejected": -341.8377380371094, "loss": 0.0916, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.8230945467948914, "rewards/margins": 8.061508178710938, "rewards/rejected": -7.2384138107299805, "step": 2540 }, { "epoch": 1.32, "learning_rate": 3.1181870338496843e-07, "logits/chosen": -2.5278661251068115, "logits/rejected": -2.5652339458465576, "logps/chosen": -211.1104278564453, "logps/rejected": -290.1295471191406, "loss": 0.1534, "rewards/accuracies": 1.0, "rewards/chosen": 0.07069828361272812, "rewards/margins": 5.71384859085083, "rewards/rejected": -5.643150806427002, "step": 2550 }, { "epoch": 1.32, "learning_rate": 3.108624976094856e-07, "logits/chosen": -2.585292339324951, "logits/rejected": -2.625404119491577, "logps/chosen": -264.1541442871094, "logps/rejected": -261.3079833984375, "loss": 0.1008, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.9801958799362183, "rewards/margins": 5.846863746643066, "rewards/rejected": -4.866667747497559, "step": 2560 }, { "epoch": 1.33, "learning_rate": 3.0990629183400266e-07, "logits/chosen": -2.601198196411133, "logits/rejected": -2.5282673835754395, "logps/chosen": -289.15863037109375, "logps/rejected": -304.9074401855469, "loss": 0.1013, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.5822550654411316, "rewards/margins": 6.540297508239746, "rewards/rejected": -5.958043098449707, "step": 2570 }, { "epoch": 1.33, "learning_rate": 3.089500860585198e-07, "logits/chosen": -2.5145509243011475, "logits/rejected": -2.5709242820739746, "logps/chosen": -233.1308135986328, "logps/rejected": -259.27081298828125, "loss": 0.109, "rewards/accuracies": 0.9375, "rewards/chosen": -0.19086940586566925, "rewards/margins": 5.136707305908203, "rewards/rejected": -5.327577114105225, "step": 2580 }, { "epoch": 1.34, "learning_rate": 3.079938802830369e-07, "logits/chosen": -2.5653598308563232, "logits/rejected": -2.6376378536224365, "logps/chosen": -255.4644775390625, "logps/rejected": -230.2383575439453, "loss": 0.073, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.03477507829666138, "rewards/margins": 5.211745262145996, "rewards/rejected": -5.246520519256592, "step": 2590 }, { "epoch": 1.34, "learning_rate": 3.07037674507554e-07, "logits/chosen": -2.5367038249969482, "logits/rejected": -2.5960888862609863, "logps/chosen": -300.2066345214844, "logps/rejected": -349.10064697265625, "loss": 0.1305, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.460475355386734, "rewards/margins": 6.672016143798828, "rewards/rejected": -6.211541175842285, "step": 2600 }, { "epoch": 1.35, "learning_rate": 3.060814687320711e-07, "logits/chosen": -2.5328152179718018, "logits/rejected": -2.514622688293457, "logps/chosen": -292.62896728515625, "logps/rejected": -287.25946044921875, "loss": 0.0986, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.45202842354774475, "rewards/margins": 6.34035587310791, "rewards/rejected": -5.888327598571777, "step": 2610 }, { "epoch": 1.35, "learning_rate": 3.0512526295658824e-07, "logits/chosen": -2.5849661827087402, "logits/rejected": -2.564257860183716, "logps/chosen": -290.31890869140625, "logps/rejected": -296.34967041015625, "loss": 0.1226, "rewards/accuracies": 1.0, "rewards/chosen": 0.20002314448356628, "rewards/margins": 5.983278751373291, "rewards/rejected": -5.783255100250244, "step": 2620 }, { "epoch": 1.36, "learning_rate": 3.0416905718110536e-07, "logits/chosen": -2.567880630493164, "logits/rejected": -2.539358615875244, "logps/chosen": -247.0207977294922, "logps/rejected": -269.39459228515625, "loss": 0.131, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.034124452620744705, "rewards/margins": 6.358598709106445, "rewards/rejected": -6.324474334716797, "step": 2630 }, { "epoch": 1.36, "learning_rate": 3.0321285140562247e-07, "logits/chosen": -2.608093738555908, "logits/rejected": -2.5030605792999268, "logps/chosen": -249.52786254882812, "logps/rejected": -272.12176513671875, "loss": 0.1505, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.8648720979690552, "rewards/margins": 5.290918350219727, "rewards/rejected": -6.155789375305176, "step": 2640 }, { "epoch": 1.37, "learning_rate": 3.022566456301396e-07, "logits/chosen": -2.6363885402679443, "logits/rejected": -2.7040047645568848, "logps/chosen": -264.7932434082031, "logps/rejected": -294.1228942871094, "loss": 0.1816, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.0637885183095932, "rewards/margins": 6.300909042358398, "rewards/rejected": -6.364696979522705, "step": 2650 }, { "epoch": 1.37, "learning_rate": 3.013004398546567e-07, "logits/chosen": -2.542020797729492, "logits/rejected": -2.5166268348693848, "logps/chosen": -271.88916015625, "logps/rejected": -299.7735595703125, "loss": 0.1132, "rewards/accuracies": 0.9375, "rewards/chosen": -0.3094290792942047, "rewards/margins": 5.991552352905273, "rewards/rejected": -6.300982475280762, "step": 2660 }, { "epoch": 1.38, "learning_rate": 3.003442340791738e-07, "logits/chosen": -2.6782054901123047, "logits/rejected": -2.670994758605957, "logps/chosen": -265.01751708984375, "logps/rejected": -290.5347900390625, "loss": 0.0778, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.39196690917015076, "rewards/margins": 6.586050510406494, "rewards/rejected": -6.9780168533325195, "step": 2670 }, { "epoch": 1.38, "learning_rate": 2.9938802830369093e-07, "logits/chosen": -2.668478488922119, "logits/rejected": -2.671940803527832, "logps/chosen": -266.77191162109375, "logps/rejected": -299.1286926269531, "loss": 0.104, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.42280688881874084, "rewards/margins": 5.9002275466918945, "rewards/rejected": -6.323034763336182, "step": 2680 }, { "epoch": 1.39, "learning_rate": 2.9843182252820805e-07, "logits/chosen": -2.507829189300537, "logits/rejected": -2.517129421234131, "logps/chosen": -252.4044189453125, "logps/rejected": -290.12060546875, "loss": 0.074, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.012675967998802662, "rewards/margins": 6.789121150970459, "rewards/rejected": -6.801795959472656, "step": 2690 }, { "epoch": 1.39, "learning_rate": 2.974756167527252e-07, "logits/chosen": -2.6370203495025635, "logits/rejected": -2.5525128841400146, "logps/chosen": -240.90438842773438, "logps/rejected": -289.1092224121094, "loss": 0.0887, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.09208206832408905, "rewards/margins": 5.901047706604004, "rewards/rejected": -5.993129253387451, "step": 2700 }, { "epoch": 1.4, "learning_rate": 2.9651941097724233e-07, "logits/chosen": -2.576045513153076, "logits/rejected": -2.587123394012451, "logps/chosen": -280.9703369140625, "logps/rejected": -267.0768127441406, "loss": 0.115, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.17211000621318817, "rewards/margins": 5.533499717712402, "rewards/rejected": -5.361390113830566, "step": 2710 }, { "epoch": 1.4, "learning_rate": 2.9556320520175945e-07, "logits/chosen": -2.4903769493103027, "logits/rejected": -2.542959213256836, "logps/chosen": -272.4198913574219, "logps/rejected": -274.2657775878906, "loss": 0.1448, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.25773993134498596, "rewards/margins": 5.519182205200195, "rewards/rejected": -5.776921272277832, "step": 2720 }, { "epoch": 1.41, "learning_rate": 2.946069994262765e-07, "logits/chosen": -2.641730546951294, "logits/rejected": -2.5455493927001953, "logps/chosen": -292.0690612792969, "logps/rejected": -322.41717529296875, "loss": 0.1071, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.45149484276771545, "rewards/margins": 6.424929141998291, "rewards/rejected": -5.973433971405029, "step": 2730 }, { "epoch": 1.41, "learning_rate": 2.9365079365079363e-07, "logits/chosen": -2.602846622467041, "logits/rejected": -2.6123135089874268, "logps/chosen": -280.2417907714844, "logps/rejected": -258.2378234863281, "loss": 0.1074, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2893966734409332, "rewards/margins": 6.021862983703613, "rewards/rejected": -5.732466697692871, "step": 2740 }, { "epoch": 1.42, "learning_rate": 2.9269458787531074e-07, "logits/chosen": -2.4980530738830566, "logits/rejected": -2.4847359657287598, "logps/chosen": -241.2249755859375, "logps/rejected": -305.1526794433594, "loss": 0.0899, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.004615753889083862, "rewards/margins": 5.619439601898193, "rewards/rejected": -5.6240553855896, "step": 2750 }, { "epoch": 1.42, "learning_rate": 2.9173838209982786e-07, "logits/chosen": -2.618914842605591, "logits/rejected": -2.564487934112549, "logps/chosen": -264.39581298828125, "logps/rejected": -246.0105438232422, "loss": 0.0961, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.016721582040190697, "rewards/margins": 5.4498291015625, "rewards/rejected": -5.466550827026367, "step": 2760 }, { "epoch": 1.43, "learning_rate": 2.90782176324345e-07, "logits/chosen": -2.517812490463257, "logits/rejected": -2.5693249702453613, "logps/chosen": -340.0567932128906, "logps/rejected": -288.3482360839844, "loss": 0.0991, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.5332422256469727, "rewards/margins": 6.4156060218811035, "rewards/rejected": -5.882363319396973, "step": 2770 }, { "epoch": 1.44, "learning_rate": 2.898259705488621e-07, "logits/chosen": -2.7078089714050293, "logits/rejected": -2.634216070175171, "logps/chosen": -264.4534606933594, "logps/rejected": -270.3084411621094, "loss": 0.0881, "rewards/accuracies": 1.0, "rewards/chosen": 0.09578968584537506, "rewards/margins": 5.951535224914551, "rewards/rejected": -5.855745315551758, "step": 2780 }, { "epoch": 1.44, "learning_rate": 2.888697647733792e-07, "logits/chosen": -2.6018433570861816, "logits/rejected": -2.5431325435638428, "logps/chosen": -325.04339599609375, "logps/rejected": -321.00384521484375, "loss": 0.0834, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.6666939854621887, "rewards/margins": 7.204258918762207, "rewards/rejected": -6.537564277648926, "step": 2790 }, { "epoch": 1.45, "learning_rate": 2.879135589978963e-07, "logits/chosen": -2.544001340866089, "logits/rejected": -2.519418954849243, "logps/chosen": -244.0146484375, "logps/rejected": -299.86334228515625, "loss": 0.0933, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.21346421539783478, "rewards/margins": 5.652622222900391, "rewards/rejected": -5.439157962799072, "step": 2800 }, { "epoch": 1.45, "learning_rate": 2.8695735322241344e-07, "logits/chosen": -2.5885682106018066, "logits/rejected": -2.5310218334198, "logps/chosen": -282.88092041015625, "logps/rejected": -294.2001647949219, "loss": 0.0818, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.4320918917655945, "rewards/margins": 6.334897518157959, "rewards/rejected": -5.902805328369141, "step": 2810 }, { "epoch": 1.46, "learning_rate": 2.8600114744693055e-07, "logits/chosen": -2.4299087524414062, "logits/rejected": -2.4698925018310547, "logps/chosen": -251.56283569335938, "logps/rejected": -303.685546875, "loss": 0.0708, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.48298412561416626, "rewards/margins": 6.807847023010254, "rewards/rejected": -6.324862480163574, "step": 2820 }, { "epoch": 1.46, "learning_rate": 2.8504494167144767e-07, "logits/chosen": -2.4746925830841064, "logits/rejected": -2.4473392963409424, "logps/chosen": -267.5787048339844, "logps/rejected": -305.0351867675781, "loss": 0.1244, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.123781718313694, "rewards/margins": 6.507838249206543, "rewards/rejected": -6.384057521820068, "step": 2830 }, { "epoch": 1.47, "learning_rate": 2.8408873589596484e-07, "logits/chosen": -2.513401985168457, "logits/rejected": -2.4804389476776123, "logps/chosen": -263.47601318359375, "logps/rejected": -278.4566345214844, "loss": 0.0877, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.06293614208698273, "rewards/margins": 6.116005897521973, "rewards/rejected": -6.053069591522217, "step": 2840 }, { "epoch": 1.47, "learning_rate": 2.8313253012048195e-07, "logits/chosen": -2.317784547805786, "logits/rejected": -2.4009079933166504, "logps/chosen": -243.90869140625, "logps/rejected": -322.0810546875, "loss": 0.1898, "rewards/accuracies": 0.9375, "rewards/chosen": 0.003992212004959583, "rewards/margins": 6.088549613952637, "rewards/rejected": -6.084556579589844, "step": 2850 }, { "epoch": 1.48, "learning_rate": 2.8217632434499907e-07, "logits/chosen": -2.4719886779785156, "logits/rejected": -2.3840715885162354, "logps/chosen": -293.1739501953125, "logps/rejected": -318.64990234375, "loss": 0.0858, "rewards/accuracies": 1.0, "rewards/chosen": -0.09377191215753555, "rewards/margins": 6.726716041564941, "rewards/rejected": -6.820487976074219, "step": 2860 }, { "epoch": 1.48, "learning_rate": 2.812201185695162e-07, "logits/chosen": -2.4654247760772705, "logits/rejected": -2.4648938179016113, "logps/chosen": -238.2162322998047, "logps/rejected": -299.6322937011719, "loss": 0.0761, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.07485105842351913, "rewards/margins": 5.973660469055176, "rewards/rejected": -5.898809909820557, "step": 2870 }, { "epoch": 1.49, "learning_rate": 2.802639127940333e-07, "logits/chosen": -2.512289047241211, "logits/rejected": -2.473268985748291, "logps/chosen": -267.739990234375, "logps/rejected": -249.61135864257812, "loss": 0.0935, "rewards/accuracies": 0.9375, "rewards/chosen": -0.15142297744750977, "rewards/margins": 5.871346950531006, "rewards/rejected": -6.022769927978516, "step": 2880 }, { "epoch": 1.49, "learning_rate": 2.7930770701855036e-07, "logits/chosen": -2.570551872253418, "logits/rejected": -2.468529224395752, "logps/chosen": -271.27410888671875, "logps/rejected": -282.323974609375, "loss": 0.1277, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.5963325500488281, "rewards/margins": 7.158364295959473, "rewards/rejected": -6.562032222747803, "step": 2890 }, { "epoch": 1.5, "learning_rate": 2.783515012430675e-07, "logits/chosen": -2.406557559967041, "logits/rejected": -2.4173083305358887, "logps/chosen": -268.2488708496094, "logps/rejected": -279.4272155761719, "loss": 0.1144, "rewards/accuracies": 0.9375, "rewards/chosen": 0.4366177022457123, "rewards/margins": 6.323246955871582, "rewards/rejected": -5.886629581451416, "step": 2900 }, { "epoch": 1.5, "learning_rate": 2.773952954675846e-07, "logits/chosen": -2.4727766513824463, "logits/rejected": -2.4939932823181152, "logps/chosen": -266.2540283203125, "logps/rejected": -250.78445434570312, "loss": 0.0947, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.21414819359779358, "rewards/margins": 5.781444549560547, "rewards/rejected": -5.567296504974365, "step": 2910 }, { "epoch": 1.51, "learning_rate": 2.764390896921017e-07, "logits/chosen": -2.4799656867980957, "logits/rejected": -2.466733932495117, "logps/chosen": -298.5470275878906, "logps/rejected": -284.695556640625, "loss": 0.0868, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.38546982407569885, "rewards/margins": 6.353014945983887, "rewards/rejected": -5.967545509338379, "step": 2920 }, { "epoch": 1.51, "learning_rate": 2.754828839166188e-07, "logits/chosen": -2.458850383758545, "logits/rejected": -2.3630478382110596, "logps/chosen": -259.45361328125, "logps/rejected": -270.87664794921875, "loss": 0.0972, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.5138736367225647, "rewards/margins": 7.339796543121338, "rewards/rejected": -6.82592248916626, "step": 2930 }, { "epoch": 1.52, "learning_rate": 2.7452667814113594e-07, "logits/chosen": -2.532984495162964, "logits/rejected": -2.6302952766418457, "logps/chosen": -287.7354431152344, "logps/rejected": -295.53240966796875, "loss": 0.0902, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.5111293196678162, "rewards/margins": 6.166231155395508, "rewards/rejected": -5.655101776123047, "step": 2940 }, { "epoch": 1.52, "learning_rate": 2.7357047236565306e-07, "logits/chosen": -2.53074312210083, "logits/rejected": -2.509186267852783, "logps/chosen": -256.3830261230469, "logps/rejected": -287.47113037109375, "loss": 0.0582, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.4252433180809021, "rewards/margins": 6.003683090209961, "rewards/rejected": -5.578439712524414, "step": 2950 }, { "epoch": 1.53, "learning_rate": 2.7261426659017017e-07, "logits/chosen": -2.465263843536377, "logits/rejected": -2.441957950592041, "logps/chosen": -259.81170654296875, "logps/rejected": -287.27825927734375, "loss": 0.1006, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.16452935338020325, "rewards/margins": 6.049590110778809, "rewards/rejected": -5.885060787200928, "step": 2960 }, { "epoch": 1.53, "learning_rate": 2.716580608146873e-07, "logits/chosen": -2.467651605606079, "logits/rejected": -2.4969096183776855, "logps/chosen": -325.9266662597656, "logps/rejected": -307.8146057128906, "loss": 0.0931, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.5208323001861572, "rewards/margins": 7.010321140289307, "rewards/rejected": -6.489488124847412, "step": 2970 }, { "epoch": 1.54, "learning_rate": 2.7070185503920446e-07, "logits/chosen": -2.297741174697876, "logits/rejected": -2.342559576034546, "logps/chosen": -260.2572937011719, "logps/rejected": -300.3307800292969, "loss": 0.0574, "rewards/accuracies": 1.0, "rewards/chosen": 0.3879585862159729, "rewards/margins": 6.9699883460998535, "rewards/rejected": -6.582029819488525, "step": 2980 }, { "epoch": 1.54, "learning_rate": 2.6974564926372157e-07, "logits/chosen": -2.491931438446045, "logits/rejected": -2.3935139179229736, "logps/chosen": -290.97320556640625, "logps/rejected": -317.1175231933594, "loss": 0.063, "rewards/accuracies": 1.0, "rewards/chosen": 0.28951650857925415, "rewards/margins": 7.234450340270996, "rewards/rejected": -6.944933891296387, "step": 2990 }, { "epoch": 1.55, "learning_rate": 2.687894434882387e-07, "logits/chosen": -2.462905168533325, "logits/rejected": -2.491457223892212, "logps/chosen": -242.7196502685547, "logps/rejected": -251.0511932373047, "loss": 0.0747, "rewards/accuracies": 0.9375, "rewards/chosen": -0.3230946958065033, "rewards/margins": 5.371783256530762, "rewards/rejected": -5.694878578186035, "step": 3000 }, { "epoch": 1.55, "eval_logits/chosen": -2.520482063293457, "eval_logits/rejected": -2.49690580368042, "eval_logps/chosen": -295.31353759765625, "eval_logps/rejected": -272.6327209472656, "eval_loss": 0.5747828483581543, "eval_rewards/accuracies": 0.7559999823570251, "eval_rewards/chosen": -1.9590092897415161, "eval_rewards/margins": 2.068134307861328, "eval_rewards/rejected": -4.027143955230713, "eval_runtime": 454.0222, "eval_samples_per_second": 4.405, "eval_steps_per_second": 0.275, "step": 3000 }, { "epoch": 1.55, "learning_rate": 2.678332377127558e-07, "logits/chosen": -2.430216073989868, "logits/rejected": -2.52648663520813, "logps/chosen": -302.43536376953125, "logps/rejected": -307.7148742675781, "loss": 0.0768, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.5085604786872864, "rewards/margins": 7.694282531738281, "rewards/rejected": -7.185723304748535, "step": 3010 }, { "epoch": 1.56, "learning_rate": 2.668770319372729e-07, "logits/chosen": -2.472560167312622, "logits/rejected": -2.4429688453674316, "logps/chosen": -269.9737243652344, "logps/rejected": -255.141845703125, "loss": 0.079, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 0.05735977366566658, "rewards/margins": 5.4194865226745605, "rewards/rejected": -5.362127304077148, "step": 3020 }, { "epoch": 1.56, "learning_rate": 2.6592082616179004e-07, "logits/chosen": -2.4499270915985107, "logits/rejected": -2.321331739425659, "logps/chosen": -199.5952911376953, "logps/rejected": -271.1117248535156, "loss": 0.0957, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.10843601077795029, "rewards/margins": 6.051784992218018, "rewards/rejected": -5.9433488845825195, "step": 3030 }, { "epoch": 1.57, "learning_rate": 2.649646203863071e-07, "logits/chosen": -2.5263118743896484, "logits/rejected": -2.501732110977173, "logps/chosen": -257.1748046875, "logps/rejected": -306.52545166015625, "loss": 0.0858, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.3991379737854004, "rewards/margins": 6.244803428649902, "rewards/rejected": -5.845664978027344, "step": 3040 }, { "epoch": 1.57, "learning_rate": 2.640084146108242e-07, "logits/chosen": -2.5237767696380615, "logits/rejected": -2.5036301612854004, "logps/chosen": -305.75408935546875, "logps/rejected": -300.43505859375, "loss": 0.0738, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.2263985425233841, "rewards/margins": 7.201443672180176, "rewards/rejected": -6.975044250488281, "step": 3050 }, { "epoch": 1.58, "learning_rate": 2.6305220883534133e-07, "logits/chosen": -2.4949545860290527, "logits/rejected": -2.5045151710510254, "logps/chosen": -239.982666015625, "logps/rejected": -278.0937805175781, "loss": 0.0746, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.044704683125019073, "rewards/margins": 5.782896995544434, "rewards/rejected": -5.738192558288574, "step": 3060 }, { "epoch": 1.58, "learning_rate": 2.6209600305985845e-07, "logits/chosen": -2.5282607078552246, "logits/rejected": -2.5183358192443848, "logps/chosen": -278.13458251953125, "logps/rejected": -305.0899963378906, "loss": 0.1216, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.6105505228042603, "rewards/margins": 7.445713996887207, "rewards/rejected": -6.8351640701293945, "step": 3070 }, { "epoch": 1.59, "learning_rate": 2.6113979728437556e-07, "logits/chosen": -2.641510486602783, "logits/rejected": -2.635889768600464, "logps/chosen": -302.23468017578125, "logps/rejected": -289.52545166015625, "loss": 0.0945, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28618529438972473, "rewards/margins": 6.0523362159729, "rewards/rejected": -5.766150951385498, "step": 3080 }, { "epoch": 1.6, "learning_rate": 2.601835915088927e-07, "logits/chosen": -2.5400562286376953, "logits/rejected": -2.446110486984253, "logps/chosen": -238.501220703125, "logps/rejected": -286.86163330078125, "loss": 0.0957, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.2534426152706146, "rewards/margins": 6.206382751464844, "rewards/rejected": -5.952939510345459, "step": 3090 }, { "epoch": 1.6, "learning_rate": 2.592273857334098e-07, "logits/chosen": -2.6041581630706787, "logits/rejected": -2.4704174995422363, "logps/chosen": -260.03790283203125, "logps/rejected": -295.0962829589844, "loss": 0.0959, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.3247249722480774, "rewards/margins": 6.418676853179932, "rewards/rejected": -6.09395170211792, "step": 3100 }, { "epoch": 1.61, "learning_rate": 2.582711799579269e-07, "logits/chosen": -2.6691956520080566, "logits/rejected": -2.6473820209503174, "logps/chosen": -294.5990905761719, "logps/rejected": -310.1783142089844, "loss": 0.0778, "rewards/accuracies": 0.9375, "rewards/chosen": 0.013369923457503319, "rewards/margins": 6.645904541015625, "rewards/rejected": -6.632534027099609, "step": 3110 }, { "epoch": 1.61, "learning_rate": 2.573149741824441e-07, "logits/chosen": -2.5838215351104736, "logits/rejected": -2.569681167602539, "logps/chosen": -309.8135681152344, "logps/rejected": -293.36224365234375, "loss": 0.0819, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.18596038222312927, "rewards/margins": 6.355560302734375, "rewards/rejected": -6.169599533081055, "step": 3120 }, { "epoch": 1.62, "learning_rate": 2.563587684069612e-07, "logits/chosen": -2.5741264820098877, "logits/rejected": -2.5943167209625244, "logps/chosen": -303.80975341796875, "logps/rejected": -300.5847473144531, "loss": 0.0687, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.188811257481575, "rewards/margins": 6.232892036437988, "rewards/rejected": -6.421704292297363, "step": 3130 }, { "epoch": 1.62, "learning_rate": 2.554025626314783e-07, "logits/chosen": -2.476935863494873, "logits/rejected": -2.5481014251708984, "logps/chosen": -276.67401123046875, "logps/rejected": -311.7854919433594, "loss": 0.0848, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5916802883148193, "rewards/margins": 6.4489240646362305, "rewards/rejected": -7.040605068206787, "step": 3140 }, { "epoch": 1.63, "learning_rate": 2.544463568559954e-07, "logits/chosen": -2.555737018585205, "logits/rejected": -2.5703513622283936, "logps/chosen": -313.1715393066406, "logps/rejected": -320.77484130859375, "loss": 0.0695, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.053133077919483185, "rewards/margins": 6.832341194152832, "rewards/rejected": -6.779207706451416, "step": 3150 }, { "epoch": 1.63, "learning_rate": 2.5349015108051254e-07, "logits/chosen": -2.426164150238037, "logits/rejected": -2.5023982524871826, "logps/chosen": -259.065673828125, "logps/rejected": -274.95361328125, "loss": 0.0805, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.08334522694349289, "rewards/margins": 6.496880531311035, "rewards/rejected": -6.580225467681885, "step": 3160 }, { "epoch": 1.64, "learning_rate": 2.5253394530502966e-07, "logits/chosen": -2.479907512664795, "logits/rejected": -2.4037084579467773, "logps/chosen": -331.0310363769531, "logps/rejected": -324.9789733886719, "loss": 0.0667, "rewards/accuracies": 0.9375, "rewards/chosen": 0.15661577880382538, "rewards/margins": 6.679041862487793, "rewards/rejected": -6.522425651550293, "step": 3170 }, { "epoch": 1.64, "learning_rate": 2.5157773952954677e-07, "logits/chosen": -2.610734462738037, "logits/rejected": -2.492011308670044, "logps/chosen": -306.02264404296875, "logps/rejected": -303.4534912109375, "loss": 0.0959, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.17716434597969055, "rewards/margins": 6.456340789794922, "rewards/rejected": -6.279176235198975, "step": 3180 }, { "epoch": 1.65, "learning_rate": 2.506215337540639e-07, "logits/chosen": -2.574583053588867, "logits/rejected": -2.423633575439453, "logps/chosen": -261.4005126953125, "logps/rejected": -308.74761962890625, "loss": 0.1468, "rewards/accuracies": 1.0, "rewards/chosen": 0.08365867286920547, "rewards/margins": 6.674959659576416, "rewards/rejected": -6.591300010681152, "step": 3190 }, { "epoch": 1.65, "learning_rate": 2.4966532797858095e-07, "logits/chosen": -2.4675204753875732, "logits/rejected": -2.391970157623291, "logps/chosen": -287.25286865234375, "logps/rejected": -287.6166687011719, "loss": 0.1103, "rewards/accuracies": 0.9375, "rewards/chosen": 0.16912779211997986, "rewards/margins": 6.987713813781738, "rewards/rejected": -6.818585395812988, "step": 3200 }, { "epoch": 1.66, "learning_rate": 2.4870912220309807e-07, "logits/chosen": -2.4692444801330566, "logits/rejected": -2.492424488067627, "logps/chosen": -284.3822326660156, "logps/rejected": -278.57940673828125, "loss": 0.0819, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.0931725949048996, "rewards/margins": 5.680792331695557, "rewards/rejected": -5.587619304656982, "step": 3210 }, { "epoch": 1.66, "learning_rate": 2.477529164276152e-07, "logits/chosen": -2.5302493572235107, "logits/rejected": -2.5462918281555176, "logps/chosen": -274.50482177734375, "logps/rejected": -305.86639404296875, "loss": 0.1224, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.22570249438285828, "rewards/margins": 6.788195610046387, "rewards/rejected": -6.562493324279785, "step": 3220 }, { "epoch": 1.67, "learning_rate": 2.4679671065213235e-07, "logits/chosen": -2.4377293586730957, "logits/rejected": -2.495702028274536, "logps/chosen": -284.8072204589844, "logps/rejected": -318.3940734863281, "loss": 0.1224, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27616390585899353, "rewards/margins": 6.491647243499756, "rewards/rejected": -6.215483665466309, "step": 3230 }, { "epoch": 1.67, "learning_rate": 2.4584050487664947e-07, "logits/chosen": -2.43880295753479, "logits/rejected": -2.5362446308135986, "logps/chosen": -283.766845703125, "logps/rejected": -288.78924560546875, "loss": 0.065, "rewards/accuracies": 1.0, "rewards/chosen": 0.6184531450271606, "rewards/margins": 7.084000587463379, "rewards/rejected": -6.46554708480835, "step": 3240 }, { "epoch": 1.68, "learning_rate": 2.448842991011666e-07, "logits/chosen": -2.446498394012451, "logits/rejected": -2.405273199081421, "logps/chosen": -293.029052734375, "logps/rejected": -291.66790771484375, "loss": 0.1185, "rewards/accuracies": 0.9375, "rewards/chosen": -0.1758905053138733, "rewards/margins": 5.736870288848877, "rewards/rejected": -5.9127607345581055, "step": 3250 }, { "epoch": 1.68, "learning_rate": 2.439280933256837e-07, "logits/chosen": -2.4379031658172607, "logits/rejected": -2.4364233016967773, "logps/chosen": -242.67715454101562, "logps/rejected": -309.957275390625, "loss": 0.0544, "rewards/accuracies": 1.0, "rewards/chosen": 0.2026246041059494, "rewards/margins": 6.98794412612915, "rewards/rejected": -6.7853193283081055, "step": 3260 }, { "epoch": 1.69, "learning_rate": 2.429718875502008e-07, "logits/chosen": -2.330289602279663, "logits/rejected": -2.3436808586120605, "logps/chosen": -273.34246826171875, "logps/rejected": -333.4544372558594, "loss": 0.0806, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.07439390569925308, "rewards/margins": 6.902178764343262, "rewards/rejected": -6.9765729904174805, "step": 3270 }, { "epoch": 1.69, "learning_rate": 2.420156817747179e-07, "logits/chosen": -2.3813562393188477, "logits/rejected": -2.4932188987731934, "logps/chosen": -278.9999084472656, "logps/rejected": -269.7981872558594, "loss": 0.059, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.24793286621570587, "rewards/margins": 6.85955286026001, "rewards/rejected": -6.611619472503662, "step": 3280 }, { "epoch": 1.7, "learning_rate": 2.41059475999235e-07, "logits/chosen": -2.4017083644866943, "logits/rejected": -2.4412665367126465, "logps/chosen": -251.3889923095703, "logps/rejected": -283.4482727050781, "loss": 0.0791, "rewards/accuracies": 0.9375, "rewards/chosen": -0.6491401791572571, "rewards/margins": 5.978515148162842, "rewards/rejected": -6.627655982971191, "step": 3290 }, { "epoch": 1.7, "learning_rate": 2.4010327022375216e-07, "logits/chosen": -2.5032293796539307, "logits/rejected": -2.48028826713562, "logps/chosen": -304.7440185546875, "logps/rejected": -320.63519287109375, "loss": 0.1398, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.17621102929115295, "rewards/margins": 7.24979305267334, "rewards/rejected": -7.073582649230957, "step": 3300 }, { "epoch": 1.71, "learning_rate": 2.391470644482693e-07, "logits/chosen": -2.4918460845947266, "logits/rejected": -2.436323642730713, "logps/chosen": -289.7032165527344, "logps/rejected": -310.91546630859375, "loss": 0.0748, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.15517649054527283, "rewards/margins": 6.368616104125977, "rewards/rejected": -6.213438987731934, "step": 3310 }, { "epoch": 1.71, "learning_rate": 2.3819085867278636e-07, "logits/chosen": -2.4458746910095215, "logits/rejected": -2.3586153984069824, "logps/chosen": -211.6595916748047, "logps/rejected": -250.60513305664062, "loss": 0.0827, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.6259239315986633, "rewards/margins": 5.855485439300537, "rewards/rejected": -6.481408596038818, "step": 3320 }, { "epoch": 1.72, "learning_rate": 2.3723465289730348e-07, "logits/chosen": -2.5415797233581543, "logits/rejected": -2.482503652572632, "logps/chosen": -305.93267822265625, "logps/rejected": -257.7454833984375, "loss": 0.1036, "rewards/accuracies": 0.9375, "rewards/chosen": 0.0975867435336113, "rewards/margins": 5.548628807067871, "rewards/rejected": -5.4510416984558105, "step": 3330 }, { "epoch": 1.72, "learning_rate": 2.362784471218206e-07, "logits/chosen": -2.4980437755584717, "logits/rejected": -2.5161662101745605, "logps/chosen": -262.29510498046875, "logps/rejected": -310.7427978515625, "loss": 0.0731, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.31484660506248474, "rewards/margins": 6.899011135101318, "rewards/rejected": -6.584165096282959, "step": 3340 }, { "epoch": 1.73, "learning_rate": 2.353222413463377e-07, "logits/chosen": -2.462398052215576, "logits/rejected": -2.387648582458496, "logps/chosen": -273.9061279296875, "logps/rejected": -313.47222900390625, "loss": 0.0813, "rewards/accuracies": 0.9375, "rewards/chosen": -0.27307185530662537, "rewards/margins": 6.449624061584473, "rewards/rejected": -6.722695827484131, "step": 3350 }, { "epoch": 1.73, "learning_rate": 2.3436603557085483e-07, "logits/chosen": -2.559948205947876, "logits/rejected": -2.435615062713623, "logps/chosen": -279.50567626953125, "logps/rejected": -314.01318359375, "loss": 0.0912, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.1880934089422226, "rewards/margins": 5.78375768661499, "rewards/rejected": -5.971850395202637, "step": 3360 }, { "epoch": 1.74, "learning_rate": 2.3340982979537197e-07, "logits/chosen": -2.477184772491455, "logits/rejected": -2.506683588027954, "logps/chosen": -345.5298156738281, "logps/rejected": -338.9172668457031, "loss": 0.0733, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.6365272998809814, "rewards/margins": 6.926106929779053, "rewards/rejected": -6.28957986831665, "step": 3370 }, { "epoch": 1.74, "learning_rate": 2.3245362401988909e-07, "logits/chosen": -2.4809818267822266, "logits/rejected": -2.4546568393707275, "logps/chosen": -291.1073303222656, "logps/rejected": -299.65234375, "loss": 0.0924, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.744446873664856, "rewards/margins": 7.913092613220215, "rewards/rejected": -7.168646335601807, "step": 3380 }, { "epoch": 1.75, "learning_rate": 2.314974182444062e-07, "logits/chosen": -2.5422558784484863, "logits/rejected": -2.5349318981170654, "logps/chosen": -265.03131103515625, "logps/rejected": -303.9304504394531, "loss": 0.0583, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.8401824831962585, "rewards/margins": 6.261730194091797, "rewards/rejected": -7.101912498474121, "step": 3390 }, { "epoch": 1.76, "learning_rate": 2.305412124689233e-07, "logits/chosen": -2.3767032623291016, "logits/rejected": -2.358243465423584, "logps/chosen": -271.84942626953125, "logps/rejected": -295.79327392578125, "loss": 0.0739, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.003557852003723383, "rewards/margins": 7.05438232421875, "rewards/rejected": -7.057940483093262, "step": 3400 }, { "epoch": 1.76, "learning_rate": 2.295850066934404e-07, "logits/chosen": -2.4502758979797363, "logits/rejected": -2.3828396797180176, "logps/chosen": -278.19732666015625, "logps/rejected": -309.7081604003906, "loss": 0.0784, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.501421332359314, "rewards/margins": 6.933775424957275, "rewards/rejected": -7.435196876525879, "step": 3410 }, { "epoch": 1.77, "learning_rate": 2.2862880091795752e-07, "logits/chosen": -2.4277431964874268, "logits/rejected": -2.446159839630127, "logps/chosen": -270.41741943359375, "logps/rejected": -327.88714599609375, "loss": 0.0823, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.05128968879580498, "rewards/margins": 7.131208896636963, "rewards/rejected": -7.182497501373291, "step": 3420 }, { "epoch": 1.77, "learning_rate": 2.2767259514247464e-07, "logits/chosen": -2.369462490081787, "logits/rejected": -2.2862589359283447, "logps/chosen": -275.76141357421875, "logps/rejected": -328.746337890625, "loss": 0.0831, "rewards/accuracies": 0.9375, "rewards/chosen": -0.08160386979579926, "rewards/margins": 6.414852142333984, "rewards/rejected": -6.496455192565918, "step": 3430 }, { "epoch": 1.78, "learning_rate": 2.2671638936699178e-07, "logits/chosen": -2.2966933250427246, "logits/rejected": -2.256803512573242, "logps/chosen": -271.08062744140625, "logps/rejected": -306.69134521484375, "loss": 0.0773, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.03467688709497452, "rewards/margins": 6.333457946777344, "rewards/rejected": -6.298781394958496, "step": 3440 }, { "epoch": 1.78, "learning_rate": 2.257601835915089e-07, "logits/chosen": -2.444070339202881, "logits/rejected": -2.4232065677642822, "logps/chosen": -327.8500671386719, "logps/rejected": -316.18670654296875, "loss": 0.0727, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.3836885094642639, "rewards/margins": 7.016473293304443, "rewards/rejected": -6.632785797119141, "step": 3450 }, { "epoch": 1.79, "learning_rate": 2.24803977816026e-07, "logits/chosen": -2.389791488647461, "logits/rejected": -2.4729177951812744, "logps/chosen": -283.513916015625, "logps/rejected": -287.8778991699219, "loss": 0.1155, "rewards/accuracies": 0.9375, "rewards/chosen": 0.20163340866565704, "rewards/margins": 7.153794288635254, "rewards/rejected": -6.952160835266113, "step": 3460 }, { "epoch": 1.79, "learning_rate": 2.2384777204054313e-07, "logits/chosen": -2.516119956970215, "logits/rejected": -2.493978977203369, "logps/chosen": -290.07672119140625, "logps/rejected": -308.48785400390625, "loss": 0.0915, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.6690025329589844, "rewards/margins": 7.590751647949219, "rewards/rejected": -6.921748161315918, "step": 3470 }, { "epoch": 1.8, "learning_rate": 2.2289156626506022e-07, "logits/chosen": -2.3329920768737793, "logits/rejected": -2.2687020301818848, "logps/chosen": -297.0395812988281, "logps/rejected": -300.65667724609375, "loss": 0.0788, "rewards/accuracies": 0.9375, "rewards/chosen": -0.2801963686943054, "rewards/margins": 6.951547145843506, "rewards/rejected": -7.231744289398193, "step": 3480 }, { "epoch": 1.8, "learning_rate": 2.2193536048957733e-07, "logits/chosen": -2.523899555206299, "logits/rejected": -2.528120279312134, "logps/chosen": -317.03179931640625, "logps/rejected": -318.65460205078125, "loss": 0.0846, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.14804738759994507, "rewards/margins": 6.38545560836792, "rewards/rejected": -6.237408638000488, "step": 3490 }, { "epoch": 1.81, "learning_rate": 2.2097915471409445e-07, "logits/chosen": -2.4564592838287354, "logits/rejected": -2.4133288860321045, "logps/chosen": -283.5343933105469, "logps/rejected": -309.0581359863281, "loss": 0.101, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.22065086662769318, "rewards/margins": 6.83633279800415, "rewards/rejected": -7.05698299407959, "step": 3500 }, { "epoch": 1.81, "eval_logits/chosen": -2.527757167816162, "eval_logits/rejected": -2.5068888664245605, "eval_logps/chosen": -295.24420166015625, "eval_logps/rejected": -274.2143859863281, "eval_loss": 0.5783348679542542, "eval_rewards/accuracies": 0.7680000066757202, "eval_rewards/chosen": -1.9520740509033203, "eval_rewards/margins": 2.2332372665405273, "eval_rewards/rejected": -4.185311317443848, "eval_runtime": 452.5009, "eval_samples_per_second": 4.42, "eval_steps_per_second": 0.276, "step": 3500 }, { "epoch": 1.81, "learning_rate": 2.200229489386116e-07, "logits/chosen": -2.519521474838257, "logits/rejected": -2.472960948944092, "logps/chosen": -282.6535949707031, "logps/rejected": -330.7150573730469, "loss": 0.0975, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.10534670203924179, "rewards/margins": 7.252067565917969, "rewards/rejected": -7.357414245605469, "step": 3510 }, { "epoch": 1.82, "learning_rate": 2.190667431631287e-07, "logits/chosen": -2.394043445587158, "logits/rejected": -2.4325778484344482, "logps/chosen": -275.0331726074219, "logps/rejected": -367.2027893066406, "loss": 0.1485, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.1844065934419632, "rewards/margins": 7.9727044105529785, "rewards/rejected": -7.788296699523926, "step": 3520 }, { "epoch": 1.82, "learning_rate": 2.1811053738764582e-07, "logits/chosen": -2.448549509048462, "logits/rejected": -2.4569427967071533, "logps/chosen": -246.14657592773438, "logps/rejected": -308.890869140625, "loss": 0.0624, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.19459474086761475, "rewards/margins": 6.0366058349609375, "rewards/rejected": -6.231200695037842, "step": 3530 }, { "epoch": 1.83, "learning_rate": 2.1715433161216294e-07, "logits/chosen": -2.4541616439819336, "logits/rejected": -2.3277955055236816, "logps/chosen": -274.67626953125, "logps/rejected": -317.1044921875, "loss": 0.0751, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.32509785890579224, "rewards/margins": 6.280895233154297, "rewards/rejected": -6.605993747711182, "step": 3540 }, { "epoch": 1.83, "learning_rate": 2.1619812583668005e-07, "logits/chosen": -2.5092625617980957, "logits/rejected": -2.4619314670562744, "logps/chosen": -258.93194580078125, "logps/rejected": -278.76763916015625, "loss": 0.1008, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.588664710521698, "rewards/margins": 6.409360408782959, "rewards/rejected": -6.998025417327881, "step": 3550 }, { "epoch": 1.84, "learning_rate": 2.1524192006119714e-07, "logits/chosen": -2.406841516494751, "logits/rejected": -2.4362905025482178, "logps/chosen": -252.5372772216797, "logps/rejected": -290.995361328125, "loss": 0.0803, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.6738952994346619, "rewards/margins": 6.143965721130371, "rewards/rejected": -6.817861080169678, "step": 3560 }, { "epoch": 1.84, "learning_rate": 2.1428571428571426e-07, "logits/chosen": -2.506378412246704, "logits/rejected": -2.50364351272583, "logps/chosen": -253.6861114501953, "logps/rejected": -268.87969970703125, "loss": 0.0661, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.7515280246734619, "rewards/margins": 5.812872886657715, "rewards/rejected": -6.564399719238281, "step": 3570 }, { "epoch": 1.85, "learning_rate": 2.133295085102314e-07, "logits/chosen": -2.3749794960021973, "logits/rejected": -2.441770076751709, "logps/chosen": -265.58209228515625, "logps/rejected": -287.2449645996094, "loss": 0.0564, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.5784146189689636, "rewards/margins": 6.812424659729004, "rewards/rejected": -7.390838623046875, "step": 3580 }, { "epoch": 1.85, "learning_rate": 2.1237330273474851e-07, "logits/chosen": -2.5062410831451416, "logits/rejected": -2.5346713066101074, "logps/chosen": -313.18817138671875, "logps/rejected": -302.63458251953125, "loss": 0.1101, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.45042672753334045, "rewards/margins": 6.4521989822387695, "rewards/rejected": -6.902626037597656, "step": 3590 }, { "epoch": 1.86, "learning_rate": 2.1141709695926563e-07, "logits/chosen": -2.4420742988586426, "logits/rejected": -2.443021059036255, "logps/chosen": -278.341064453125, "logps/rejected": -274.72418212890625, "loss": 0.0752, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": -0.20474250614643097, "rewards/margins": 6.852424621582031, "rewards/rejected": -7.057167053222656, "step": 3600 }, { "epoch": 1.86, "learning_rate": 2.1046089118378275e-07, "logits/chosen": -2.5255866050720215, "logits/rejected": -2.548825740814209, "logps/chosen": -258.55224609375, "logps/rejected": -315.3794250488281, "loss": 0.0795, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.010959947481751442, "rewards/margins": 7.67264461517334, "rewards/rejected": -7.683606147766113, "step": 3610 }, { "epoch": 1.87, "learning_rate": 2.0950468540829986e-07, "logits/chosen": -2.4413561820983887, "logits/rejected": -2.4037675857543945, "logps/chosen": -281.16693115234375, "logps/rejected": -287.8060302734375, "loss": 0.1006, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.09261156618595123, "rewards/margins": 6.9859113693237305, "rewards/rejected": -6.8933000564575195, "step": 3620 }, { "epoch": 1.87, "learning_rate": 2.0854847963281698e-07, "logits/chosen": -2.480898380279541, "logits/rejected": -2.3625235557556152, "logps/chosen": -312.2430419921875, "logps/rejected": -297.37713623046875, "loss": 0.0805, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.038004614412784576, "rewards/margins": 6.296678066253662, "rewards/rejected": -6.258673667907715, "step": 3630 }, { "epoch": 1.88, "learning_rate": 2.0759227385733407e-07, "logits/chosen": -2.3831233978271484, "logits/rejected": -2.4543986320495605, "logps/chosen": -319.70159912109375, "logps/rejected": -310.35638427734375, "loss": 0.0865, "rewards/accuracies": 0.9375, "rewards/chosen": -0.2939375936985016, "rewards/margins": 6.249878883361816, "rewards/rejected": -6.543816566467285, "step": 3640 }, { "epoch": 1.88, "learning_rate": 2.066360680818512e-07, "logits/chosen": -2.4780735969543457, "logits/rejected": -2.4746382236480713, "logps/chosen": -336.36346435546875, "logps/rejected": -311.2398986816406, "loss": 0.0663, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.3942198157310486, "rewards/margins": 6.873496055603027, "rewards/rejected": -6.47927713394165, "step": 3650 }, { "epoch": 1.89, "learning_rate": 2.0567986230636832e-07, "logits/chosen": -2.5300562381744385, "logits/rejected": -2.520646572113037, "logps/chosen": -259.832763671875, "logps/rejected": -299.1865539550781, "loss": 0.1066, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.015356218442320824, "rewards/margins": 6.783368110656738, "rewards/rejected": -6.7987236976623535, "step": 3660 }, { "epoch": 1.89, "learning_rate": 2.0472365653088544e-07, "logits/chosen": -2.542448043823242, "logits/rejected": -2.494253635406494, "logps/chosen": -304.13446044921875, "logps/rejected": -295.2920837402344, "loss": 0.1075, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.3823709487915039, "rewards/margins": 5.561723232269287, "rewards/rejected": -5.944094657897949, "step": 3670 }, { "epoch": 1.9, "learning_rate": 2.0376745075540256e-07, "logits/chosen": -2.5494580268859863, "logits/rejected": -2.544642925262451, "logps/chosen": -327.9197998046875, "logps/rejected": -315.2252197265625, "loss": 0.0836, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.03714234381914139, "rewards/margins": 6.751425266265869, "rewards/rejected": -6.788567543029785, "step": 3680 }, { "epoch": 1.91, "learning_rate": 2.0281124497991967e-07, "logits/chosen": -2.564119577407837, "logits/rejected": -2.4600670337677, "logps/chosen": -285.39801025390625, "logps/rejected": -321.54742431640625, "loss": 0.0885, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.11016042530536652, "rewards/margins": 6.66648006439209, "rewards/rejected": -6.556318759918213, "step": 3690 }, { "epoch": 1.91, "learning_rate": 2.018550392044368e-07, "logits/chosen": -2.59401798248291, "logits/rejected": -2.5557501316070557, "logps/chosen": -276.79486083984375, "logps/rejected": -344.27130126953125, "loss": 0.0822, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.26534900069236755, "rewards/margins": 7.319410800933838, "rewards/rejected": -7.054062843322754, "step": 3700 }, { "epoch": 1.92, "learning_rate": 2.0089883342895388e-07, "logits/chosen": -2.6014533042907715, "logits/rejected": -2.5620899200439453, "logps/chosen": -318.00531005859375, "logps/rejected": -271.3183288574219, "loss": 0.0996, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.26080864667892456, "rewards/margins": 6.475592136383057, "rewards/rejected": -6.214783668518066, "step": 3710 }, { "epoch": 1.92, "learning_rate": 1.9994262765347102e-07, "logits/chosen": -2.523375988006592, "logits/rejected": -2.512540340423584, "logps/chosen": -293.73431396484375, "logps/rejected": -266.2129821777344, "loss": 0.0836, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.26058363914489746, "rewards/margins": 6.056021690368652, "rewards/rejected": -6.316605091094971, "step": 3720 }, { "epoch": 1.93, "learning_rate": 1.9898642187798813e-07, "logits/chosen": -2.490314245223999, "logits/rejected": -2.4401144981384277, "logps/chosen": -289.86444091796875, "logps/rejected": -321.94183349609375, "loss": 0.0654, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.539985716342926, "rewards/margins": 7.538631439208984, "rewards/rejected": -6.9986467361450195, "step": 3730 }, { "epoch": 1.93, "learning_rate": 1.9803021610250525e-07, "logits/chosen": -2.5355782508850098, "logits/rejected": -2.546966791152954, "logps/chosen": -235.0393829345703, "logps/rejected": -285.04840087890625, "loss": 0.0731, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.02458820305764675, "rewards/margins": 6.333740234375, "rewards/rejected": -6.358328342437744, "step": 3740 }, { "epoch": 1.94, "learning_rate": 1.9707401032702237e-07, "logits/chosen": -2.481501579284668, "logits/rejected": -2.4209208488464355, "logps/chosen": -266.3040771484375, "logps/rejected": -290.20208740234375, "loss": 0.0538, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.03531923145055771, "rewards/margins": 6.58829402923584, "rewards/rejected": -6.552975654602051, "step": 3750 }, { "epoch": 1.94, "learning_rate": 1.9611780455153948e-07, "logits/chosen": -2.583940267562866, "logits/rejected": -2.5408358573913574, "logps/chosen": -239.2235870361328, "logps/rejected": -294.5646057128906, "loss": 0.1252, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.46535977721214294, "rewards/margins": 6.524919033050537, "rewards/rejected": -6.059558868408203, "step": 3760 }, { "epoch": 1.95, "learning_rate": 1.951615987760566e-07, "logits/chosen": -2.467961072921753, "logits/rejected": -2.4821152687072754, "logps/chosen": -277.696533203125, "logps/rejected": -301.1275939941406, "loss": 0.0803, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.28950750827789307, "rewards/margins": 6.481263637542725, "rewards/rejected": -6.7707719802856445, "step": 3770 }, { "epoch": 1.95, "learning_rate": 1.942053930005737e-07, "logits/chosen": -2.5769028663635254, "logits/rejected": -2.512784004211426, "logps/chosen": -287.0464782714844, "logps/rejected": -302.48370361328125, "loss": 0.0578, "rewards/accuracies": 1.0, "rewards/chosen": 0.1127987951040268, "rewards/margins": 7.0196099281311035, "rewards/rejected": -6.906810760498047, "step": 3780 }, { "epoch": 1.96, "learning_rate": 1.9324918722509086e-07, "logits/chosen": -2.4373650550842285, "logits/rejected": -2.4223923683166504, "logps/chosen": -271.5222473144531, "logps/rejected": -329.9495544433594, "loss": 0.0852, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.07137300074100494, "rewards/margins": 6.933795928955078, "rewards/rejected": -7.005169868469238, "step": 3790 }, { "epoch": 1.96, "learning_rate": 1.9229298144960794e-07, "logits/chosen": -2.4774272441864014, "logits/rejected": -2.485574722290039, "logps/chosen": -289.32550048828125, "logps/rejected": -297.17132568359375, "loss": 0.1129, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.4329603612422943, "rewards/margins": 6.4090399742126465, "rewards/rejected": -6.8420000076293945, "step": 3800 }, { "epoch": 1.97, "learning_rate": 1.9133677567412506e-07, "logits/chosen": -2.480685234069824, "logits/rejected": -2.485748767852783, "logps/chosen": -285.3147277832031, "logps/rejected": -300.1988830566406, "loss": 0.0892, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.2884216904640198, "rewards/margins": 6.330131530761719, "rewards/rejected": -6.618553161621094, "step": 3810 }, { "epoch": 1.97, "learning_rate": 1.9038056989864218e-07, "logits/chosen": -2.5223355293273926, "logits/rejected": -2.4684529304504395, "logps/chosen": -249.61636352539062, "logps/rejected": -274.7422790527344, "loss": 0.1282, "rewards/accuracies": 0.9375, "rewards/chosen": 0.3523308336734772, "rewards/margins": 5.8032026290893555, "rewards/rejected": -5.450871467590332, "step": 3820 }, { "epoch": 1.98, "learning_rate": 1.894243641231593e-07, "logits/chosen": -2.3684513568878174, "logits/rejected": -2.2880539894104004, "logps/chosen": -269.7086486816406, "logps/rejected": -254.0093231201172, "loss": 0.0998, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.3140656352043152, "rewards/margins": 5.751679420471191, "rewards/rejected": -6.065744400024414, "step": 3830 }, { "epoch": 1.98, "learning_rate": 1.884681583476764e-07, "logits/chosen": -2.474822521209717, "logits/rejected": -2.5617165565490723, "logps/chosen": -290.51214599609375, "logps/rejected": -301.39251708984375, "loss": 0.0547, "rewards/accuracies": 0.9375, "rewards/chosen": -0.7417269945144653, "rewards/margins": 5.65903902053833, "rewards/rejected": -6.400765895843506, "step": 3840 }, { "epoch": 1.99, "learning_rate": 1.8751195257219352e-07, "logits/chosen": -2.536726474761963, "logits/rejected": -2.5045382976531982, "logps/chosen": -254.51803588867188, "logps/rejected": -287.4189758300781, "loss": 0.1006, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.414132297039032, "rewards/margins": 5.95471715927124, "rewards/rejected": -6.368849754333496, "step": 3850 }, { "epoch": 1.99, "learning_rate": 1.8655574679671067e-07, "logits/chosen": -2.4787344932556152, "logits/rejected": -2.4844470024108887, "logps/chosen": -293.6579895019531, "logps/rejected": -310.86761474609375, "loss": 0.0978, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.47846388816833496, "rewards/margins": 6.510424613952637, "rewards/rejected": -6.988888740539551, "step": 3860 }, { "epoch": 2.0, "learning_rate": 1.8559954102122778e-07, "logits/chosen": -2.479588508605957, "logits/rejected": -2.514061450958252, "logps/chosen": -280.0299072265625, "logps/rejected": -297.7696533203125, "loss": 0.1121, "rewards/accuracies": 0.9375, "rewards/chosen": -0.11727344989776611, "rewards/margins": 6.20565128326416, "rewards/rejected": -6.3229241371154785, "step": 3870 }, { "epoch": 2.0, "learning_rate": 1.8464333524574487e-07, "logits/chosen": -2.4771392345428467, "logits/rejected": -2.4389753341674805, "logps/chosen": -256.0650634765625, "logps/rejected": -277.08477783203125, "loss": 0.0354, "rewards/accuracies": 1.0, "rewards/chosen": 0.06204669550061226, "rewards/margins": 6.886547088623047, "rewards/rejected": -6.82450008392334, "step": 3880 }, { "epoch": 2.01, "learning_rate": 1.8368712947026199e-07, "logits/chosen": -2.4898078441619873, "logits/rejected": -2.522775173187256, "logps/chosen": -287.1253967285156, "logps/rejected": -301.94256591796875, "loss": 0.0156, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.26958638429641724, "rewards/margins": 7.4097700119018555, "rewards/rejected": -7.140183448791504, "step": 3890 }, { "epoch": 2.01, "learning_rate": 1.827309236947791e-07, "logits/chosen": -2.4328882694244385, "logits/rejected": -2.46454119682312, "logps/chosen": -252.57009887695312, "logps/rejected": -318.90521240234375, "loss": 0.0219, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.20105068385601044, "rewards/margins": 8.330595016479492, "rewards/rejected": -8.129544258117676, "step": 3900 }, { "epoch": 2.02, "learning_rate": 1.8177471791929622e-07, "logits/chosen": -2.362769603729248, "logits/rejected": -2.421079397201538, "logps/chosen": -266.7387390136719, "logps/rejected": -366.26568603515625, "loss": 0.0168, "rewards/accuracies": 1.0, "rewards/chosen": 0.3538321852684021, "rewards/margins": 8.694085121154785, "rewards/rejected": -8.340251922607422, "step": 3910 }, { "epoch": 2.02, "learning_rate": 1.8081851214381333e-07, "logits/chosen": -2.3552629947662354, "logits/rejected": -2.3446311950683594, "logps/chosen": -284.10064697265625, "logps/rejected": -355.9640197753906, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": 0.04302588850259781, "rewards/margins": 8.178693771362305, "rewards/rejected": -8.13566780090332, "step": 3920 }, { "epoch": 2.03, "learning_rate": 1.7986230636833047e-07, "logits/chosen": -2.4111831188201904, "logits/rejected": -2.5026137828826904, "logps/chosen": -228.980712890625, "logps/rejected": -296.83905029296875, "loss": 0.0186, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.2731500267982483, "rewards/margins": 7.313241481781006, "rewards/rejected": -7.586390495300293, "step": 3930 }, { "epoch": 2.03, "learning_rate": 1.789061005928476e-07, "logits/chosen": -2.5294995307922363, "logits/rejected": -2.470599889755249, "logps/chosen": -309.84344482421875, "logps/rejected": -292.3616943359375, "loss": 0.0254, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.040870584547519684, "rewards/margins": 7.898612022399902, "rewards/rejected": -7.8577423095703125, "step": 3940 }, { "epoch": 2.04, "learning_rate": 1.7794989481736468e-07, "logits/chosen": -2.4372847080230713, "logits/rejected": -2.4185192584991455, "logps/chosen": -296.27508544921875, "logps/rejected": -320.5166931152344, "loss": 0.0142, "rewards/accuracies": 1.0, "rewards/chosen": -0.05343180149793625, "rewards/margins": 8.25248908996582, "rewards/rejected": -8.305920600891113, "step": 3950 }, { "epoch": 2.04, "learning_rate": 1.769936890418818e-07, "logits/chosen": -2.4072155952453613, "logits/rejected": -2.3540451526641846, "logps/chosen": -273.29339599609375, "logps/rejected": -315.81890869140625, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": 0.36594390869140625, "rewards/margins": 8.549717903137207, "rewards/rejected": -8.183772087097168, "step": 3960 }, { "epoch": 2.05, "learning_rate": 1.760374832663989e-07, "logits/chosen": -2.407019853591919, "logits/rejected": -2.346592664718628, "logps/chosen": -274.61138916015625, "logps/rejected": -335.91253662109375, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": -0.06718329340219498, "rewards/margins": 8.160886764526367, "rewards/rejected": -8.228068351745605, "step": 3970 }, { "epoch": 2.05, "learning_rate": 1.7508127749091603e-07, "logits/chosen": -2.4587526321411133, "logits/rejected": -2.431252956390381, "logps/chosen": -281.40447998046875, "logps/rejected": -324.2808837890625, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -0.30483338236808777, "rewards/margins": 8.109731674194336, "rewards/rejected": -8.414565086364746, "step": 3980 }, { "epoch": 2.06, "learning_rate": 1.7412507171543314e-07, "logits/chosen": -2.4788222312927246, "logits/rejected": -2.4190168380737305, "logps/chosen": -273.2280578613281, "logps/rejected": -290.38812255859375, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -0.5941675305366516, "rewards/margins": 8.313642501831055, "rewards/rejected": -8.907809257507324, "step": 3990 }, { "epoch": 2.07, "learning_rate": 1.7316886593995028e-07, "logits/chosen": -2.524629831314087, "logits/rejected": -2.4027457237243652, "logps/chosen": -255.3746795654297, "logps/rejected": -309.2898254394531, "loss": 0.0195, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.27210792899131775, "rewards/margins": 8.531095504760742, "rewards/rejected": -8.803202629089355, "step": 4000 }, { "epoch": 2.07, "eval_logits/chosen": -2.5158400535583496, "eval_logits/rejected": -2.493459463119507, "eval_logps/chosen": -305.0455017089844, "eval_logps/rejected": -289.9938049316406, "eval_loss": 0.625259518623352, "eval_rewards/accuracies": 0.7599999904632568, "eval_rewards/chosen": -2.932209014892578, "eval_rewards/margins": 2.831045150756836, "eval_rewards/rejected": -5.763253688812256, "eval_runtime": 453.5801, "eval_samples_per_second": 4.409, "eval_steps_per_second": 0.276, "step": 4000 }, { "epoch": 2.07, "learning_rate": 1.722126601644674e-07, "logits/chosen": -2.39493989944458, "logits/rejected": -2.40425705909729, "logps/chosen": -286.09185791015625, "logps/rejected": -304.4429016113281, "loss": 0.0108, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.2783908247947693, "rewards/margins": 7.916007041931152, "rewards/rejected": -8.194398880004883, "step": 4010 }, { "epoch": 2.08, "learning_rate": 1.7125645438898452e-07, "logits/chosen": -2.4972126483917236, "logits/rejected": -2.3945891857147217, "logps/chosen": -275.5196228027344, "logps/rejected": -324.5783996582031, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -0.20885951817035675, "rewards/margins": 9.706292152404785, "rewards/rejected": -9.915151596069336, "step": 4020 }, { "epoch": 2.08, "learning_rate": 1.703002486135016e-07, "logits/chosen": -2.485563278198242, "logits/rejected": -2.3387653827667236, "logps/chosen": -294.34124755859375, "logps/rejected": -329.8710021972656, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": 0.1807212084531784, "rewards/margins": 9.752906799316406, "rewards/rejected": -9.572185516357422, "step": 4030 }, { "epoch": 2.09, "learning_rate": 1.6934404283801872e-07, "logits/chosen": -2.4767284393310547, "logits/rejected": -2.2920525074005127, "logps/chosen": -288.2818908691406, "logps/rejected": -346.46185302734375, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -0.2743307054042816, "rewards/margins": 9.252851486206055, "rewards/rejected": -9.527181625366211, "step": 4040 }, { "epoch": 2.09, "learning_rate": 1.6838783706253584e-07, "logits/chosen": -2.425131320953369, "logits/rejected": -2.5031018257141113, "logps/chosen": -253.77273559570312, "logps/rejected": -301.98980712890625, "loss": 0.0374, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.680608332157135, "rewards/margins": 8.351430892944336, "rewards/rejected": -9.032038688659668, "step": 4050 }, { "epoch": 2.1, "learning_rate": 1.6743163128705295e-07, "logits/chosen": -2.4143614768981934, "logits/rejected": -2.4409687519073486, "logps/chosen": -264.8473815917969, "logps/rejected": -324.42376708984375, "loss": 0.0124, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.908231258392334, "rewards/margins": 8.655680656433105, "rewards/rejected": -9.563910484313965, "step": 4060 }, { "epoch": 2.1, "learning_rate": 1.664754255115701e-07, "logits/chosen": -2.4121220111846924, "logits/rejected": -2.3599774837493896, "logps/chosen": -262.0404052734375, "logps/rejected": -341.3482666015625, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/chosen": -0.46254047751426697, "rewards/margins": 9.965926170349121, "rewards/rejected": -10.428467750549316, "step": 4070 }, { "epoch": 2.11, "learning_rate": 1.655192197360872e-07, "logits/chosen": -2.550471305847168, "logits/rejected": -2.515150547027588, "logps/chosen": -318.02349853515625, "logps/rejected": -362.1571960449219, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -0.4734498858451843, "rewards/margins": 9.279683113098145, "rewards/rejected": -9.753132820129395, "step": 4080 }, { "epoch": 2.11, "learning_rate": 1.6456301396060433e-07, "logits/chosen": -2.474956750869751, "logits/rejected": -2.4845316410064697, "logps/chosen": -312.4612121582031, "logps/rejected": -371.6642150878906, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -0.6691607236862183, "rewards/margins": 8.871939659118652, "rewards/rejected": -9.541101455688477, "step": 4090 }, { "epoch": 2.12, "learning_rate": 1.6360680818512144e-07, "logits/chosen": -2.4650988578796387, "logits/rejected": -2.3654093742370605, "logps/chosen": -273.10906982421875, "logps/rejected": -328.1131591796875, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -0.6473243832588196, "rewards/margins": 8.533228874206543, "rewards/rejected": -9.18055248260498, "step": 4100 }, { "epoch": 2.12, "learning_rate": 1.6265060240963853e-07, "logits/chosen": -2.4874815940856934, "logits/rejected": -2.346606969833374, "logps/chosen": -313.10040283203125, "logps/rejected": -328.90081787109375, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -0.4576417803764343, "rewards/margins": 8.776191711425781, "rewards/rejected": -9.233833312988281, "step": 4110 }, { "epoch": 2.13, "learning_rate": 1.6169439663415565e-07, "logits/chosen": -2.5000967979431152, "logits/rejected": -2.4653024673461914, "logps/chosen": -291.36895751953125, "logps/rejected": -340.0940246582031, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -0.645675539970398, "rewards/margins": 10.322629928588867, "rewards/rejected": -10.968305587768555, "step": 4120 }, { "epoch": 2.13, "learning_rate": 1.6073819085867276e-07, "logits/chosen": -2.5758209228515625, "logits/rejected": -2.4631595611572266, "logps/chosen": -273.5103759765625, "logps/rejected": -392.3283386230469, "loss": 0.0123, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.28731220960617065, "rewards/margins": 9.878499984741211, "rewards/rejected": -10.165812492370605, "step": 4130 }, { "epoch": 2.14, "learning_rate": 1.597819850831899e-07, "logits/chosen": -2.4753005504608154, "logits/rejected": -2.4263906478881836, "logps/chosen": -257.69207763671875, "logps/rejected": -337.54058837890625, "loss": 0.0285, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.8755598068237305, "rewards/margins": 9.041923522949219, "rewards/rejected": -9.917482376098633, "step": 4140 }, { "epoch": 2.14, "learning_rate": 1.5882577930770702e-07, "logits/chosen": -2.386593818664551, "logits/rejected": -2.465447187423706, "logps/chosen": -290.82623291015625, "logps/rejected": -332.8897399902344, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -0.8657897710800171, "rewards/margins": 9.042566299438477, "rewards/rejected": -9.908356666564941, "step": 4150 }, { "epoch": 2.15, "learning_rate": 1.5786957353222414e-07, "logits/chosen": -2.412400245666504, "logits/rejected": -2.4159159660339355, "logps/chosen": -333.2178649902344, "logps/rejected": -335.25787353515625, "loss": 0.0111, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.09050460904836655, "rewards/margins": 9.707788467407227, "rewards/rejected": -9.798294067382812, "step": 4160 }, { "epoch": 2.15, "learning_rate": 1.5691336775674125e-07, "logits/chosen": -2.3777973651885986, "logits/rejected": -2.3793344497680664, "logps/chosen": -267.68084716796875, "logps/rejected": -323.4200744628906, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": -0.5352864861488342, "rewards/margins": 10.006217956542969, "rewards/rejected": -10.541502952575684, "step": 4170 }, { "epoch": 2.16, "learning_rate": 1.5595716198125837e-07, "logits/chosen": -2.379467725753784, "logits/rejected": -2.499807834625244, "logps/chosen": -306.67608642578125, "logps/rejected": -353.30084228515625, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -0.9916990995407104, "rewards/margins": 9.460695266723633, "rewards/rejected": -10.452393531799316, "step": 4180 }, { "epoch": 2.16, "learning_rate": 1.5500095620577546e-07, "logits/chosen": -2.4940500259399414, "logits/rejected": -2.462193489074707, "logps/chosen": -270.82696533203125, "logps/rejected": -294.65570068359375, "loss": 0.0114, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.4269378185272217, "rewards/margins": 8.49191665649414, "rewards/rejected": -9.918853759765625, "step": 4190 }, { "epoch": 2.17, "learning_rate": 1.5404475043029257e-07, "logits/chosen": -2.4286954402923584, "logits/rejected": -2.367332935333252, "logps/chosen": -269.7999267578125, "logps/rejected": -323.0738525390625, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/chosen": -1.022783875465393, "rewards/margins": 9.329833984375, "rewards/rejected": -10.352617263793945, "step": 4200 }, { "epoch": 2.17, "learning_rate": 1.5308854465480971e-07, "logits/chosen": -2.498307704925537, "logits/rejected": -2.450392961502075, "logps/chosen": -280.20977783203125, "logps/rejected": -296.2657775878906, "loss": 0.0137, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.5457987785339355, "rewards/margins": 8.372919082641602, "rewards/rejected": -8.918716430664062, "step": 4210 }, { "epoch": 2.18, "learning_rate": 1.5213233887932683e-07, "logits/chosen": -2.4233896732330322, "logits/rejected": -2.4217326641082764, "logps/chosen": -313.09271240234375, "logps/rejected": -329.91314697265625, "loss": 0.0078, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.6310591697692871, "rewards/margins": 8.810084342956543, "rewards/rejected": -9.441144943237305, "step": 4220 }, { "epoch": 2.18, "learning_rate": 1.5117613310384395e-07, "logits/chosen": -2.441246747970581, "logits/rejected": -2.4308252334594727, "logps/chosen": -294.64312744140625, "logps/rejected": -339.2587585449219, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -0.9671104550361633, "rewards/margins": 10.101009368896484, "rewards/rejected": -11.068120956420898, "step": 4230 }, { "epoch": 2.19, "learning_rate": 1.5021992732836106e-07, "logits/chosen": -2.458059787750244, "logits/rejected": -2.476111650466919, "logps/chosen": -291.772705078125, "logps/rejected": -376.759033203125, "loss": 0.0174, "rewards/accuracies": 1.0, "rewards/chosen": -1.0088956356048584, "rewards/margins": 9.92273998260498, "rewards/rejected": -10.93163776397705, "step": 4240 }, { "epoch": 2.19, "learning_rate": 1.4926372155287818e-07, "logits/chosen": -2.49906587600708, "logits/rejected": -2.332627773284912, "logps/chosen": -237.60049438476562, "logps/rejected": -333.3525390625, "loss": 0.0183, "rewards/accuracies": 1.0, "rewards/chosen": -1.078620433807373, "rewards/margins": 9.37675666809082, "rewards/rejected": -10.455377578735352, "step": 4250 }, { "epoch": 2.2, "learning_rate": 1.483075157773953e-07, "logits/chosen": -2.4600281715393066, "logits/rejected": -2.4805192947387695, "logps/chosen": -290.1564636230469, "logps/rejected": -313.1746826171875, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -0.712319552898407, "rewards/margins": 8.697793006896973, "rewards/rejected": -9.410112380981445, "step": 4260 }, { "epoch": 2.2, "learning_rate": 1.4735131000191238e-07, "logits/chosen": -2.4032301902770996, "logits/rejected": -2.3650975227355957, "logps/chosen": -251.8123016357422, "logps/rejected": -277.9178771972656, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -0.7331027984619141, "rewards/margins": 8.663496971130371, "rewards/rejected": -9.396598815917969, "step": 4270 }, { "epoch": 2.21, "learning_rate": 1.4639510422642952e-07, "logits/chosen": -2.410510540008545, "logits/rejected": -2.3593618869781494, "logps/chosen": -254.2480010986328, "logps/rejected": -306.0626525878906, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -1.2594799995422363, "rewards/margins": 7.9070024490356445, "rewards/rejected": -9.166482925415039, "step": 4280 }, { "epoch": 2.21, "learning_rate": 1.4543889845094664e-07, "logits/chosen": -2.4375698566436768, "logits/rejected": -2.336656093597412, "logps/chosen": -318.3421936035156, "logps/rejected": -384.72772216796875, "loss": 0.013, "rewards/accuracies": 1.0, "rewards/chosen": -0.4227966368198395, "rewards/margins": 10.83735466003418, "rewards/rejected": -11.260150909423828, "step": 4290 }, { "epoch": 2.22, "learning_rate": 1.4448269267546376e-07, "logits/chosen": -2.393700122833252, "logits/rejected": -2.4179205894470215, "logps/chosen": -343.4000244140625, "logps/rejected": -361.1111755371094, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -0.06643708050251007, "rewards/margins": 10.324440002441406, "rewards/rejected": -10.390877723693848, "step": 4300 }, { "epoch": 2.23, "learning_rate": 1.4352648689998087e-07, "logits/chosen": -2.451838731765747, "logits/rejected": -2.507495880126953, "logps/chosen": -259.57757568359375, "logps/rejected": -311.0859375, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -0.5939081311225891, "rewards/margins": 9.141595840454102, "rewards/rejected": -9.735504150390625, "step": 4310 }, { "epoch": 2.23, "learning_rate": 1.42570281124498e-07, "logits/chosen": -2.3880109786987305, "logits/rejected": -2.4511330127716064, "logps/chosen": -312.5009765625, "logps/rejected": -357.44384765625, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": -0.6199862360954285, "rewards/margins": 9.396383285522461, "rewards/rejected": -10.01636791229248, "step": 4320 }, { "epoch": 2.24, "learning_rate": 1.416140753490151e-07, "logits/chosen": -2.399909734725952, "logits/rejected": -2.409482955932617, "logps/chosen": -312.19403076171875, "logps/rejected": -365.5652160644531, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -0.7507609128952026, "rewards/margins": 10.391949653625488, "rewards/rejected": -11.142709732055664, "step": 4330 }, { "epoch": 2.24, "learning_rate": 1.4065786957353222e-07, "logits/chosen": -2.466676712036133, "logits/rejected": -2.3715767860412598, "logps/chosen": -290.2066955566406, "logps/rejected": -340.7693176269531, "loss": 0.019, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.8540838956832886, "rewards/margins": 9.24998950958252, "rewards/rejected": -10.104074478149414, "step": 4340 }, { "epoch": 2.25, "learning_rate": 1.3970166379804933e-07, "logits/chosen": -2.4747912883758545, "logits/rejected": -2.4412856101989746, "logps/chosen": -332.2046203613281, "logps/rejected": -336.59320068359375, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": -1.2478363513946533, "rewards/margins": 9.040945053100586, "rewards/rejected": -10.28878116607666, "step": 4350 }, { "epoch": 2.25, "learning_rate": 1.3874545802256645e-07, "logits/chosen": -2.4615142345428467, "logits/rejected": -2.4157400131225586, "logps/chosen": -275.7891845703125, "logps/rejected": -314.43426513671875, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": -0.8941132426261902, "rewards/margins": 9.226968765258789, "rewards/rejected": -10.12108039855957, "step": 4360 }, { "epoch": 2.26, "learning_rate": 1.3778925224708357e-07, "logits/chosen": -2.3651604652404785, "logits/rejected": -2.3336400985717773, "logps/chosen": -268.6673889160156, "logps/rejected": -325.3690490722656, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -1.1127688884735107, "rewards/margins": 8.900229454040527, "rewards/rejected": -10.012998580932617, "step": 4370 }, { "epoch": 2.26, "learning_rate": 1.3683304647160068e-07, "logits/chosen": -2.4090378284454346, "logits/rejected": -2.420701265335083, "logps/chosen": -261.0252380371094, "logps/rejected": -343.9711608886719, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -0.9711012840270996, "rewards/margins": 9.500223159790039, "rewards/rejected": -10.471324920654297, "step": 4380 }, { "epoch": 2.27, "learning_rate": 1.358768406961178e-07, "logits/chosen": -2.3843369483947754, "logits/rejected": -2.299290895462036, "logps/chosen": -255.8084259033203, "logps/rejected": -338.96917724609375, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -1.0958081483840942, "rewards/margins": 10.103997230529785, "rewards/rejected": -11.199804306030273, "step": 4390 }, { "epoch": 2.27, "learning_rate": 1.349206349206349e-07, "logits/chosen": -2.430680990219116, "logits/rejected": -2.4080395698547363, "logps/chosen": -250.6897735595703, "logps/rejected": -306.9358215332031, "loss": 0.0206, "rewards/accuracies": 1.0, "rewards/chosen": -1.9635181427001953, "rewards/margins": 8.765409469604492, "rewards/rejected": -10.728927612304688, "step": 4400 }, { "epoch": 2.28, "learning_rate": 1.3396442914515203e-07, "logits/chosen": -2.3354380130767822, "logits/rejected": -2.336620807647705, "logps/chosen": -297.7576904296875, "logps/rejected": -355.9744873046875, "loss": 0.0105, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.7647383213043213, "rewards/margins": 9.611452102661133, "rewards/rejected": -11.376190185546875, "step": 4410 }, { "epoch": 2.28, "learning_rate": 1.3300822336966917e-07, "logits/chosen": -2.4746663570404053, "logits/rejected": -2.2819876670837402, "logps/chosen": -323.2002258300781, "logps/rejected": -348.868896484375, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -0.9656437039375305, "rewards/margins": 10.08340835571289, "rewards/rejected": -11.049051284790039, "step": 4420 }, { "epoch": 2.29, "learning_rate": 1.3205201759418626e-07, "logits/chosen": -2.3873531818389893, "logits/rejected": -2.2998034954071045, "logps/chosen": -319.4635925292969, "logps/rejected": -344.7845153808594, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -0.3506031632423401, "rewards/margins": 9.482653617858887, "rewards/rejected": -9.833256721496582, "step": 4430 }, { "epoch": 2.29, "learning_rate": 1.3109581181870338e-07, "logits/chosen": -2.4441051483154297, "logits/rejected": -2.4536311626434326, "logps/chosen": -329.62103271484375, "logps/rejected": -332.23748779296875, "loss": 0.0105, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.5083192586898804, "rewards/margins": 9.463578224182129, "rewards/rejected": -9.971896171569824, "step": 4440 }, { "epoch": 2.3, "learning_rate": 1.301396060432205e-07, "logits/chosen": -2.4361507892608643, "logits/rejected": -2.4902892112731934, "logps/chosen": -320.44659423828125, "logps/rejected": -342.33905029296875, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -1.1917698383331299, "rewards/margins": 9.730718612670898, "rewards/rejected": -10.922487258911133, "step": 4450 }, { "epoch": 2.3, "learning_rate": 1.291834002677376e-07, "logits/chosen": -2.414278745651245, "logits/rejected": -2.304199695587158, "logps/chosen": -286.6895751953125, "logps/rejected": -354.45361328125, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -1.6277958154678345, "rewards/margins": 9.477842330932617, "rewards/rejected": -11.10563850402832, "step": 4460 }, { "epoch": 2.31, "learning_rate": 1.2822719449225472e-07, "logits/chosen": -2.363389253616333, "logits/rejected": -2.4042270183563232, "logps/chosen": -227.5398406982422, "logps/rejected": -313.44512939453125, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -1.8537158966064453, "rewards/margins": 8.813211441040039, "rewards/rejected": -10.666927337646484, "step": 4470 }, { "epoch": 2.31, "learning_rate": 1.2727098871677184e-07, "logits/chosen": -2.544398784637451, "logits/rejected": -2.509765625, "logps/chosen": -353.1852111816406, "logps/rejected": -389.2328186035156, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -0.6934762597084045, "rewards/margins": 9.873237609863281, "rewards/rejected": -10.5667142868042, "step": 4480 }, { "epoch": 2.32, "learning_rate": 1.2631478294128898e-07, "logits/chosen": -2.536165237426758, "logits/rejected": -2.4797139167785645, "logps/chosen": -280.58880615234375, "logps/rejected": -349.1954345703125, "loss": 0.0098, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.3528211116790771, "rewards/margins": 9.536347389221191, "rewards/rejected": -10.889168739318848, "step": 4490 }, { "epoch": 2.32, "learning_rate": 1.253585771658061e-07, "logits/chosen": -2.4254403114318848, "logits/rejected": -2.4034600257873535, "logps/chosen": -253.6884002685547, "logps/rejected": -346.30029296875, "loss": 0.0191, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.1951841115951538, "rewards/margins": 9.793797492980957, "rewards/rejected": -10.988981246948242, "step": 4500 }, { "epoch": 2.32, "eval_logits/chosen": -2.503566265106201, "eval_logits/rejected": -2.475616931915283, "eval_logps/chosen": -317.906005859375, "eval_logps/rejected": -308.5774230957031, "eval_loss": 0.7214946150779724, "eval_rewards/accuracies": 0.7620000243186951, "eval_rewards/chosen": -4.218255043029785, "eval_rewards/margins": 3.40336537361145, "eval_rewards/rejected": -7.621620178222656, "eval_runtime": 462.7177, "eval_samples_per_second": 4.322, "eval_steps_per_second": 0.27, "step": 4500 }, { "epoch": 2.33, "learning_rate": 1.2440237139032319e-07, "logits/chosen": -2.4602277278900146, "logits/rejected": -2.4032809734344482, "logps/chosen": -303.80816650390625, "logps/rejected": -327.02874755859375, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -1.4082835912704468, "rewards/margins": 9.613999366760254, "rewards/rejected": -11.022283554077148, "step": 4510 }, { "epoch": 2.33, "learning_rate": 1.234461656148403e-07, "logits/chosen": -2.486802816390991, "logits/rejected": -2.4940314292907715, "logps/chosen": -329.1534118652344, "logps/rejected": -371.4541015625, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -1.163053274154663, "rewards/margins": 10.781423568725586, "rewards/rejected": -11.944475173950195, "step": 4520 }, { "epoch": 2.34, "learning_rate": 1.2248995983935742e-07, "logits/chosen": -2.4711391925811768, "logits/rejected": -2.485156536102295, "logps/chosen": -306.0505065917969, "logps/rejected": -345.5367431640625, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -0.9234344363212585, "rewards/margins": 10.126235961914062, "rewards/rejected": -11.049670219421387, "step": 4530 }, { "epoch": 2.34, "learning_rate": 1.2153375406387456e-07, "logits/chosen": -2.4170022010803223, "logits/rejected": -2.4166598320007324, "logps/chosen": -303.3820495605469, "logps/rejected": -380.8604736328125, "loss": 0.008, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.3916549682617188, "rewards/margins": 10.463330268859863, "rewards/rejected": -11.854985237121582, "step": 4540 }, { "epoch": 2.35, "learning_rate": 1.2057754828839165e-07, "logits/chosen": -2.471839189529419, "logits/rejected": -2.512411594390869, "logps/chosen": -277.19158935546875, "logps/rejected": -340.2514343261719, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -0.9067068099975586, "rewards/margins": 9.49809455871582, "rewards/rejected": -10.404802322387695, "step": 4550 }, { "epoch": 2.35, "learning_rate": 1.1962134251290876e-07, "logits/chosen": -2.356781482696533, "logits/rejected": -2.4188120365142822, "logps/chosen": -306.0680847167969, "logps/rejected": -314.0914001464844, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -1.8832162618637085, "rewards/margins": 8.965120315551758, "rewards/rejected": -10.848337173461914, "step": 4560 }, { "epoch": 2.36, "learning_rate": 1.1866513673742588e-07, "logits/chosen": -2.484847068786621, "logits/rejected": -2.363482713699341, "logps/chosen": -310.15118408203125, "logps/rejected": -303.5231628417969, "loss": 0.0168, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.3100998401641846, "rewards/margins": 8.249720573425293, "rewards/rejected": -9.559820175170898, "step": 4570 }, { "epoch": 2.36, "learning_rate": 1.1770893096194301e-07, "logits/chosen": -2.399364948272705, "logits/rejected": -2.4284517765045166, "logps/chosen": -300.3472595214844, "logps/rejected": -332.69671630859375, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -1.6987594366073608, "rewards/margins": 9.020197868347168, "rewards/rejected": -10.71895694732666, "step": 4580 }, { "epoch": 2.37, "learning_rate": 1.1675272518646012e-07, "logits/chosen": -2.4957313537597656, "logits/rejected": -2.409392833709717, "logps/chosen": -249.9640655517578, "logps/rejected": -335.85638427734375, "loss": 0.0242, "rewards/accuracies": 1.0, "rewards/chosen": -1.2298882007598877, "rewards/margins": 9.339942932128906, "rewards/rejected": -10.569831848144531, "step": 4590 }, { "epoch": 2.37, "learning_rate": 1.1579651941097724e-07, "logits/chosen": -2.4879660606384277, "logits/rejected": -2.507378101348877, "logps/chosen": -326.9290466308594, "logps/rejected": -343.4966735839844, "loss": 0.0146, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.4674627780914307, "rewards/margins": 9.710068702697754, "rewards/rejected": -11.177530288696289, "step": 4600 }, { "epoch": 2.38, "learning_rate": 1.1484031363549436e-07, "logits/chosen": -2.391278028488159, "logits/rejected": -2.443850040435791, "logps/chosen": -320.7591247558594, "logps/rejected": -353.69281005859375, "loss": 0.0219, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -1.702275037765503, "rewards/margins": 10.41825008392334, "rewards/rejected": -12.120525360107422, "step": 4610 }, { "epoch": 2.39, "learning_rate": 1.1388410786001147e-07, "logits/chosen": -2.4807400703430176, "logits/rejected": -2.4674527645111084, "logps/chosen": -266.72100830078125, "logps/rejected": -361.2059631347656, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -1.4482429027557373, "rewards/margins": 9.929925918579102, "rewards/rejected": -11.378170013427734, "step": 4620 }, { "epoch": 2.39, "learning_rate": 1.1292790208452859e-07, "logits/chosen": -2.442647933959961, "logits/rejected": -2.4394030570983887, "logps/chosen": -243.0096893310547, "logps/rejected": -335.05023193359375, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -1.2735203504562378, "rewards/margins": 9.817770957946777, "rewards/rejected": -11.091290473937988, "step": 4630 }, { "epoch": 2.4, "learning_rate": 1.119716963090457e-07, "logits/chosen": -2.385737657546997, "logits/rejected": -2.3391549587249756, "logps/chosen": -265.28271484375, "logps/rejected": -349.02593994140625, "loss": 0.0125, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.0982563495635986, "rewards/margins": 9.236578941345215, "rewards/rejected": -11.33483600616455, "step": 4640 }, { "epoch": 2.4, "learning_rate": 1.1101549053356282e-07, "logits/chosen": -2.52477765083313, "logits/rejected": -2.4235565662384033, "logps/chosen": -298.9849548339844, "logps/rejected": -348.5369567871094, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -1.4369876384735107, "rewards/margins": 9.919187545776367, "rewards/rejected": -11.356176376342773, "step": 4650 }, { "epoch": 2.41, "learning_rate": 1.1005928475807993e-07, "logits/chosen": -2.469526767730713, "logits/rejected": -2.4453277587890625, "logps/chosen": -252.61422729492188, "logps/rejected": -331.5292663574219, "loss": 0.01, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.1783180236816406, "rewards/margins": 9.28120231628418, "rewards/rejected": -11.45952033996582, "step": 4660 }, { "epoch": 2.41, "learning_rate": 1.0910307898259705e-07, "logits/chosen": -2.3904144763946533, "logits/rejected": -2.4100112915039062, "logps/chosen": -267.6534729003906, "logps/rejected": -374.3344421386719, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -0.9095629453659058, "rewards/margins": 10.048948287963867, "rewards/rejected": -10.95850944519043, "step": 4670 }, { "epoch": 2.42, "learning_rate": 1.0814687320711418e-07, "logits/chosen": -2.421391010284424, "logits/rejected": -2.308657646179199, "logps/chosen": -240.39181518554688, "logps/rejected": -371.52642822265625, "loss": 0.0081, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.2718487977981567, "rewards/margins": 10.701406478881836, "rewards/rejected": -11.97325611114502, "step": 4680 }, { "epoch": 2.42, "learning_rate": 1.0719066743163128e-07, "logits/chosen": -2.453456401824951, "logits/rejected": -2.4812474250793457, "logps/chosen": -338.2146911621094, "logps/rejected": -358.6303405761719, "loss": 0.0148, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.3517457246780396, "rewards/margins": 10.342467308044434, "rewards/rejected": -11.694211959838867, "step": 4690 }, { "epoch": 2.43, "learning_rate": 1.062344616561484e-07, "logits/chosen": -2.506812810897827, "logits/rejected": -2.4892067909240723, "logps/chosen": -294.2913818359375, "logps/rejected": -364.78564453125, "loss": 0.0132, "rewards/accuracies": 1.0, "rewards/chosen": -1.3385236263275146, "rewards/margins": 10.63171672821045, "rewards/rejected": -11.970239639282227, "step": 4700 }, { "epoch": 2.43, "learning_rate": 1.0527825588066551e-07, "logits/chosen": -2.4706711769104004, "logits/rejected": -2.462312698364258, "logps/chosen": -301.4435119628906, "logps/rejected": -339.53076171875, "loss": 0.009, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.9904528856277466, "rewards/margins": 10.595855712890625, "rewards/rejected": -11.586308479309082, "step": 4710 }, { "epoch": 2.44, "learning_rate": 1.0432205010518264e-07, "logits/chosen": -2.417384624481201, "logits/rejected": -2.5014162063598633, "logps/chosen": -252.42526245117188, "logps/rejected": -357.0494079589844, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -0.7958747148513794, "rewards/margins": 10.516870498657227, "rewards/rejected": -11.312746047973633, "step": 4720 }, { "epoch": 2.44, "learning_rate": 1.0336584432969974e-07, "logits/chosen": -2.3254947662353516, "logits/rejected": -2.3634276390075684, "logps/chosen": -249.72042846679688, "logps/rejected": -326.01580810546875, "loss": 0.0111, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.8156722784042358, "rewards/margins": 9.60567855834961, "rewards/rejected": -10.421350479125977, "step": 4730 }, { "epoch": 2.45, "learning_rate": 1.0240963855421686e-07, "logits/chosen": -2.393904447555542, "logits/rejected": -2.289703607559204, "logps/chosen": -333.31494140625, "logps/rejected": -349.7557067871094, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -0.729089617729187, "rewards/margins": 9.651041030883789, "rewards/rejected": -10.38012981414795, "step": 4740 }, { "epoch": 2.45, "learning_rate": 1.0145343277873399e-07, "logits/chosen": -2.4490630626678467, "logits/rejected": -2.384364366531372, "logps/chosen": -324.8036804199219, "logps/rejected": -356.80157470703125, "loss": 0.0128, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.9558230638504028, "rewards/margins": 9.961849212646484, "rewards/rejected": -10.917671203613281, "step": 4750 }, { "epoch": 2.46, "learning_rate": 1.004972270032511e-07, "logits/chosen": -2.2176153659820557, "logits/rejected": -2.187372922897339, "logps/chosen": -258.55267333984375, "logps/rejected": -290.7706298828125, "loss": 0.0121, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.3212132453918457, "rewards/margins": 9.305283546447754, "rewards/rejected": -10.626497268676758, "step": 4760 }, { "epoch": 2.46, "learning_rate": 9.95410212277682e-08, "logits/chosen": -2.405513286590576, "logits/rejected": -2.4268226623535156, "logps/chosen": -293.0621032714844, "logps/rejected": -351.65557861328125, "loss": 0.0452, "rewards/accuracies": 1.0, "rewards/chosen": -1.2851110696792603, "rewards/margins": 9.79377555847168, "rewards/rejected": -11.078888893127441, "step": 4770 }, { "epoch": 2.47, "learning_rate": 9.858481545228532e-08, "logits/chosen": -2.388866901397705, "logits/rejected": -2.4224321842193604, "logps/chosen": -300.51416015625, "logps/rejected": -318.98834228515625, "loss": 0.0171, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.0021681785583496, "rewards/margins": 9.520451545715332, "rewards/rejected": -10.522619247436523, "step": 4780 }, { "epoch": 2.47, "learning_rate": 9.762860967680245e-08, "logits/chosen": -2.411489486694336, "logits/rejected": -2.3945744037628174, "logps/chosen": -295.2189025878906, "logps/rejected": -373.933837890625, "loss": 0.0129, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.4970338344573975, "rewards/margins": 10.310312271118164, "rewards/rejected": -11.80734634399414, "step": 4790 }, { "epoch": 2.48, "learning_rate": 9.667240390131957e-08, "logits/chosen": -2.500349760055542, "logits/rejected": -2.551851749420166, "logps/chosen": -313.2876892089844, "logps/rejected": -354.4496154785156, "loss": 0.0073, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.8869138956069946, "rewards/margins": 10.612105369567871, "rewards/rejected": -11.49902057647705, "step": 4800 }, { "epoch": 2.48, "learning_rate": 9.571619812583667e-08, "logits/chosen": -2.3907971382141113, "logits/rejected": -2.3780624866485596, "logps/chosen": -294.43145751953125, "logps/rejected": -354.15380859375, "loss": 0.0184, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.6661240458488464, "rewards/margins": 10.202630996704102, "rewards/rejected": -10.868753433227539, "step": 4810 }, { "epoch": 2.49, "learning_rate": 9.47599923503538e-08, "logits/chosen": -2.522645950317383, "logits/rejected": -2.5387370586395264, "logps/chosen": -282.9609069824219, "logps/rejected": -352.8865051269531, "loss": 0.0182, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.1960705518722534, "rewards/margins": 9.857826232910156, "rewards/rejected": -11.053895950317383, "step": 4820 }, { "epoch": 2.49, "learning_rate": 9.380378657487091e-08, "logits/chosen": -2.5203070640563965, "logits/rejected": -2.5354747772216797, "logps/chosen": -282.48968505859375, "logps/rejected": -340.5061340332031, "loss": 0.0188, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.3149895668029785, "rewards/margins": 10.316043853759766, "rewards/rejected": -11.631032943725586, "step": 4830 }, { "epoch": 2.5, "learning_rate": 9.284758079938803e-08, "logits/chosen": -2.5143141746520996, "logits/rejected": -2.5719997882843018, "logps/chosen": -290.4022216796875, "logps/rejected": -334.06939697265625, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -1.034603476524353, "rewards/margins": 9.760403633117676, "rewards/rejected": -10.795007705688477, "step": 4840 }, { "epoch": 2.5, "learning_rate": 9.189137502390513e-08, "logits/chosen": -2.513765335083008, "logits/rejected": -2.5539920330047607, "logps/chosen": -294.8843078613281, "logps/rejected": -402.92596435546875, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": -2.0495996475219727, "rewards/margins": 9.208512306213379, "rewards/rejected": -11.258111953735352, "step": 4850 }, { "epoch": 2.51, "learning_rate": 9.093516924842226e-08, "logits/chosen": -2.563143253326416, "logits/rejected": -2.4798381328582764, "logps/chosen": -272.5045166015625, "logps/rejected": -361.7091979980469, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -0.7693915367126465, "rewards/margins": 10.272433280944824, "rewards/rejected": -11.041825294494629, "step": 4860 }, { "epoch": 2.51, "learning_rate": 8.997896347293938e-08, "logits/chosen": -2.549731492996216, "logits/rejected": -2.5603187084198, "logps/chosen": -293.55511474609375, "logps/rejected": -380.87969970703125, "loss": 0.0147, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.2459195852279663, "rewards/margins": 9.440132141113281, "rewards/rejected": -10.686052322387695, "step": 4870 }, { "epoch": 2.52, "learning_rate": 8.902275769745648e-08, "logits/chosen": -2.4357972145080566, "logits/rejected": -2.5012993812561035, "logps/chosen": -261.88238525390625, "logps/rejected": -351.6615295410156, "loss": 0.0125, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.2481439113616943, "rewards/margins": 10.309308052062988, "rewards/rejected": -11.557451248168945, "step": 4880 }, { "epoch": 2.52, "learning_rate": 8.806655192197361e-08, "logits/chosen": -2.468061923980713, "logits/rejected": -2.406736373901367, "logps/chosen": -249.983154296875, "logps/rejected": -365.0889587402344, "loss": 0.0077, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.5393785238265991, "rewards/margins": 11.374832153320312, "rewards/rejected": -11.914213180541992, "step": 4890 }, { "epoch": 2.53, "learning_rate": 8.711034614649072e-08, "logits/chosen": -2.5306286811828613, "logits/rejected": -2.5501136779785156, "logps/chosen": -271.1980285644531, "logps/rejected": -366.566650390625, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": -1.6836016178131104, "rewards/margins": 10.52708625793457, "rewards/rejected": -12.210687637329102, "step": 4900 }, { "epoch": 2.53, "learning_rate": 8.615414037100784e-08, "logits/chosen": -2.408055543899536, "logits/rejected": -2.4738125801086426, "logps/chosen": -352.69366455078125, "logps/rejected": -341.93359375, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -0.7806146144866943, "rewards/margins": 9.49925422668457, "rewards/rejected": -10.279869079589844, "step": 4910 }, { "epoch": 2.54, "learning_rate": 8.519793459552494e-08, "logits/chosen": -2.4429402351379395, "logits/rejected": -2.4843060970306396, "logps/chosen": -270.4953308105469, "logps/rejected": -339.0693054199219, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": -0.785670816898346, "rewards/margins": 10.222851753234863, "rewards/rejected": -11.008522987365723, "step": 4920 }, { "epoch": 2.55, "learning_rate": 8.424172882004207e-08, "logits/chosen": -2.4533307552337646, "logits/rejected": -2.4679694175720215, "logps/chosen": -287.844482421875, "logps/rejected": -376.2936706542969, "loss": 0.0289, "rewards/accuracies": 1.0, "rewards/chosen": -0.5463220477104187, "rewards/margins": 9.996697425842285, "rewards/rejected": -10.543018341064453, "step": 4930 }, { "epoch": 2.55, "learning_rate": 8.328552304455919e-08, "logits/chosen": -2.5091729164123535, "logits/rejected": -2.5005977153778076, "logps/chosen": -256.46240234375, "logps/rejected": -319.3448181152344, "loss": 0.0136, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.2730116844177246, "rewards/margins": 9.923606872558594, "rewards/rejected": -10.19661808013916, "step": 4940 }, { "epoch": 2.56, "learning_rate": 8.23293172690763e-08, "logits/chosen": -2.511229991912842, "logits/rejected": -2.446113109588623, "logps/chosen": -287.6640930175781, "logps/rejected": -343.26190185546875, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -0.262495756149292, "rewards/margins": 10.62315559387207, "rewards/rejected": -10.885650634765625, "step": 4950 }, { "epoch": 2.56, "learning_rate": 8.137311149359343e-08, "logits/chosen": -2.594197988510132, "logits/rejected": -2.6038312911987305, "logps/chosen": -337.53973388671875, "logps/rejected": -340.3900451660156, "loss": 0.0061, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.43203097581863403, "rewards/margins": 10.693909645080566, "rewards/rejected": -11.125940322875977, "step": 4960 }, { "epoch": 2.57, "learning_rate": 8.041690571811053e-08, "logits/chosen": -2.512244939804077, "logits/rejected": -2.429426670074463, "logps/chosen": -290.28509521484375, "logps/rejected": -350.6308288574219, "loss": 0.0131, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.1895757913589478, "rewards/margins": 10.664027214050293, "rewards/rejected": -11.853602409362793, "step": 4970 }, { "epoch": 2.57, "learning_rate": 7.946069994262765e-08, "logits/chosen": -2.457437038421631, "logits/rejected": -2.4107303619384766, "logps/chosen": -266.69757080078125, "logps/rejected": -317.18695068359375, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -0.8658391833305359, "rewards/margins": 9.216228485107422, "rewards/rejected": -10.082067489624023, "step": 4980 }, { "epoch": 2.58, "learning_rate": 7.850449416714476e-08, "logits/chosen": -2.501028537750244, "logits/rejected": -2.5449934005737305, "logps/chosen": -306.799560546875, "logps/rejected": -347.26971435546875, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -0.9260441660881042, "rewards/margins": 9.568941116333008, "rewards/rejected": -10.494985580444336, "step": 4990 }, { "epoch": 2.58, "learning_rate": 7.754828839166188e-08, "logits/chosen": -2.5003304481506348, "logits/rejected": -2.4379677772521973, "logps/chosen": -280.774658203125, "logps/rejected": -358.0619201660156, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": -1.2388416528701782, "rewards/margins": 9.900052070617676, "rewards/rejected": -11.138894081115723, "step": 5000 }, { "epoch": 2.58, "eval_logits/chosen": -2.543748617172241, "eval_logits/rejected": -2.5156030654907227, "eval_logps/chosen": -318.33062744140625, "eval_logps/rejected": -309.8016052246094, "eval_loss": 0.7340511679649353, "eval_rewards/accuracies": 0.7599999904632568, "eval_rewards/chosen": -4.260715961456299, "eval_rewards/margins": 3.4833171367645264, "eval_rewards/rejected": -7.744033336639404, "eval_runtime": 462.765, "eval_samples_per_second": 4.322, "eval_steps_per_second": 0.27, "step": 5000 }, { "epoch": 2.59, "learning_rate": 7.6592082616179e-08, "logits/chosen": -2.523669481277466, "logits/rejected": -2.453369617462158, "logps/chosen": -326.9425354003906, "logps/rejected": -382.0200500488281, "loss": 0.0112, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.6532198190689087, "rewards/margins": 10.079858779907227, "rewards/rejected": -11.733078002929688, "step": 5010 }, { "epoch": 2.59, "learning_rate": 7.563587684069611e-08, "logits/chosen": -2.41379451751709, "logits/rejected": -2.458996534347534, "logps/chosen": -296.97430419921875, "logps/rejected": -358.48809814453125, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -0.40497732162475586, "rewards/margins": 10.697502136230469, "rewards/rejected": -11.10247802734375, "step": 5020 }, { "epoch": 2.6, "learning_rate": 7.467967106521324e-08, "logits/chosen": -2.474517345428467, "logits/rejected": -2.508488178253174, "logps/chosen": -267.0328063964844, "logps/rejected": -270.98907470703125, "loss": 0.0116, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.9263556599617004, "rewards/margins": 9.430020332336426, "rewards/rejected": -10.356375694274902, "step": 5030 }, { "epoch": 2.6, "learning_rate": 7.372346528973034e-08, "logits/chosen": -2.5095248222351074, "logits/rejected": -2.512310743331909, "logps/chosen": -295.5697937011719, "logps/rejected": -332.3825988769531, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -1.4971369504928589, "rewards/margins": 9.871773719787598, "rewards/rejected": -11.36890983581543, "step": 5040 }, { "epoch": 2.61, "learning_rate": 7.276725951424746e-08, "logits/chosen": -2.4795002937316895, "logits/rejected": -2.3714241981506348, "logps/chosen": -272.24066162109375, "logps/rejected": -346.04986572265625, "loss": 0.0189, "rewards/accuracies": 1.0, "rewards/chosen": -1.496202826499939, "rewards/margins": 9.852384567260742, "rewards/rejected": -11.348587989807129, "step": 5050 }, { "epoch": 2.61, "learning_rate": 7.181105373876457e-08, "logits/chosen": -2.4065327644348145, "logits/rejected": -2.5047264099121094, "logps/chosen": -276.74053955078125, "logps/rejected": -318.2535705566406, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -0.8203741908073425, "rewards/margins": 10.269460678100586, "rewards/rejected": -11.089835166931152, "step": 5060 }, { "epoch": 2.62, "learning_rate": 7.08548479632817e-08, "logits/chosen": -2.472975254058838, "logits/rejected": -2.4483237266540527, "logps/chosen": -320.4283752441406, "logps/rejected": -343.42498779296875, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -0.471790075302124, "rewards/margins": 10.54113483428955, "rewards/rejected": -11.012925148010254, "step": 5070 }, { "epoch": 2.62, "learning_rate": 6.98986421877988e-08, "logits/chosen": -2.5092146396636963, "logits/rejected": -2.537966012954712, "logps/chosen": -303.90447998046875, "logps/rejected": -370.0856018066406, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -1.359339714050293, "rewards/margins": 9.674978256225586, "rewards/rejected": -11.034318923950195, "step": 5080 }, { "epoch": 2.63, "learning_rate": 6.894243641231592e-08, "logits/chosen": -2.3674094676971436, "logits/rejected": -2.351562023162842, "logps/chosen": -286.71063232421875, "logps/rejected": -349.8485107421875, "loss": 0.0174, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.551408052444458, "rewards/margins": 10.066386222839355, "rewards/rejected": -11.617793083190918, "step": 5090 }, { "epoch": 2.63, "learning_rate": 6.798623063683305e-08, "logits/chosen": -2.399749279022217, "logits/rejected": -2.496814012527466, "logps/chosen": -293.72027587890625, "logps/rejected": -356.86395263671875, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -1.522544264793396, "rewards/margins": 11.700794219970703, "rewards/rejected": -13.223337173461914, "step": 5100 }, { "epoch": 2.64, "learning_rate": 6.703002486135017e-08, "logits/chosen": -2.3967783451080322, "logits/rejected": -2.476592779159546, "logps/chosen": -260.53924560546875, "logps/rejected": -364.6197204589844, "loss": 0.0223, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.540137529373169, "rewards/margins": 11.050385475158691, "rewards/rejected": -12.590522766113281, "step": 5110 }, { "epoch": 2.64, "learning_rate": 6.607381908586727e-08, "logits/chosen": -2.4503352642059326, "logits/rejected": -2.439924955368042, "logps/chosen": -297.86279296875, "logps/rejected": -390.11920166015625, "loss": 0.0201, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.1085379123687744, "rewards/margins": 10.787110328674316, "rewards/rejected": -11.895648002624512, "step": 5120 }, { "epoch": 2.65, "learning_rate": 6.511761331038438e-08, "logits/chosen": -2.5204012393951416, "logits/rejected": -2.4319424629211426, "logps/chosen": -248.1070556640625, "logps/rejected": -328.4260559082031, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -2.1627001762390137, "rewards/margins": 9.001168251037598, "rewards/rejected": -11.163866996765137, "step": 5130 }, { "epoch": 2.65, "learning_rate": 6.416140753490151e-08, "logits/chosen": -2.4711902141571045, "logits/rejected": -2.5176639556884766, "logps/chosen": -330.0944519042969, "logps/rejected": -382.4728088378906, "loss": 0.0131, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.061452865600586, "rewards/margins": 10.265039443969727, "rewards/rejected": -11.326491355895996, "step": 5140 }, { "epoch": 2.66, "learning_rate": 6.320520175941863e-08, "logits/chosen": -2.353011131286621, "logits/rejected": -2.3878352642059326, "logps/chosen": -275.98431396484375, "logps/rejected": -333.1819763183594, "loss": 0.0133, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.6692678928375244, "rewards/margins": 9.493376731872559, "rewards/rejected": -11.162644386291504, "step": 5150 }, { "epoch": 2.66, "learning_rate": 6.224899598393573e-08, "logits/chosen": -2.5229997634887695, "logits/rejected": -2.5036749839782715, "logps/chosen": -331.75408935546875, "logps/rejected": -387.4117126464844, "loss": 0.0108, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.1152173280715942, "rewards/margins": 10.071974754333496, "rewards/rejected": -11.1871919631958, "step": 5160 }, { "epoch": 2.67, "learning_rate": 6.129279020845286e-08, "logits/chosen": -2.4617230892181396, "logits/rejected": -2.560640811920166, "logps/chosen": -277.8742370605469, "logps/rejected": -311.23907470703125, "loss": 0.0094, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.0628654956817627, "rewards/margins": 10.087770462036133, "rewards/rejected": -11.150636672973633, "step": 5170 }, { "epoch": 2.67, "learning_rate": 6.033658443296998e-08, "logits/chosen": -2.5219173431396484, "logits/rejected": -2.521574020385742, "logps/chosen": -306.21929931640625, "logps/rejected": -358.87322998046875, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -1.613425612449646, "rewards/margins": 10.280952453613281, "rewards/rejected": -11.894378662109375, "step": 5180 }, { "epoch": 2.68, "learning_rate": 5.9380378657487085e-08, "logits/chosen": -2.5566489696502686, "logits/rejected": -2.5286972522735596, "logps/chosen": -304.65264892578125, "logps/rejected": -353.1256103515625, "loss": 0.005, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.8878873586654663, "rewards/margins": 10.550737380981445, "rewards/rejected": -11.438623428344727, "step": 5190 }, { "epoch": 2.68, "learning_rate": 5.842417288200421e-08, "logits/chosen": -2.5313048362731934, "logits/rejected": -2.540001392364502, "logps/chosen": -319.0318298339844, "logps/rejected": -362.86224365234375, "loss": 0.0089, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.8091020584106445, "rewards/margins": 10.437647819519043, "rewards/rejected": -11.246749877929688, "step": 5200 }, { "epoch": 2.69, "learning_rate": 5.7467967106521317e-08, "logits/chosen": -2.4078681468963623, "logits/rejected": -2.4756946563720703, "logps/chosen": -228.6869354248047, "logps/rejected": -344.0455627441406, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -1.5125707387924194, "rewards/margins": 10.272712707519531, "rewards/rejected": -11.78528118133545, "step": 5210 }, { "epoch": 2.69, "learning_rate": 5.651176133103844e-08, "logits/chosen": -2.577641010284424, "logits/rejected": -2.535780668258667, "logps/chosen": -287.9211120605469, "logps/rejected": -376.558349609375, "loss": 0.0136, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.5841994285583496, "rewards/margins": 11.391988754272461, "rewards/rejected": -11.976190567016602, "step": 5220 }, { "epoch": 2.7, "learning_rate": 5.555555555555555e-08, "logits/chosen": -2.3901116847991943, "logits/rejected": -2.4189741611480713, "logps/chosen": -281.49957275390625, "logps/rejected": -300.177001953125, "loss": 0.0139, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.406322717666626, "rewards/margins": 9.543334007263184, "rewards/rejected": -10.94965648651123, "step": 5230 }, { "epoch": 2.71, "learning_rate": 5.459934978007267e-08, "logits/chosen": -2.4920616149902344, "logits/rejected": -2.421905994415283, "logps/chosen": -297.64984130859375, "logps/rejected": -357.3204040527344, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -1.1165698766708374, "rewards/margins": 11.096078872680664, "rewards/rejected": -12.21264934539795, "step": 5240 }, { "epoch": 2.71, "learning_rate": 5.3643144004589786e-08, "logits/chosen": -2.469278573989868, "logits/rejected": -2.357398271560669, "logps/chosen": -338.11065673828125, "logps/rejected": -361.2065124511719, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -0.4490266740322113, "rewards/margins": 11.35706615447998, "rewards/rejected": -11.806093215942383, "step": 5250 }, { "epoch": 2.72, "learning_rate": 5.26869382291069e-08, "logits/chosen": -2.4685468673706055, "logits/rejected": -2.4665069580078125, "logps/chosen": -256.52935791015625, "logps/rejected": -312.5513916015625, "loss": 0.0425, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.2890058755874634, "rewards/margins": 9.67116928100586, "rewards/rejected": -10.960175514221191, "step": 5260 }, { "epoch": 2.72, "learning_rate": 5.173073245362402e-08, "logits/chosen": -2.429325819015503, "logits/rejected": -2.451594829559326, "logps/chosen": -303.85986328125, "logps/rejected": -367.44830322265625, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -1.4256505966186523, "rewards/margins": 10.259121894836426, "rewards/rejected": -11.684773445129395, "step": 5270 }, { "epoch": 2.73, "learning_rate": 5.077452667814113e-08, "logits/chosen": -2.3756916522979736, "logits/rejected": -2.465467691421509, "logps/chosen": -260.43890380859375, "logps/rejected": -343.8309020996094, "loss": 0.0119, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.6615362167358398, "rewards/margins": 10.869677543640137, "rewards/rejected": -12.531213760375977, "step": 5280 }, { "epoch": 2.73, "learning_rate": 4.981832090265825e-08, "logits/chosen": -2.505603790283203, "logits/rejected": -2.3853371143341064, "logps/chosen": -265.8559265136719, "logps/rejected": -338.4347229003906, "loss": 0.0214, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.0404921770095825, "rewards/margins": 10.665314674377441, "rewards/rejected": -11.705805778503418, "step": 5290 }, { "epoch": 2.74, "learning_rate": 4.8862115127175364e-08, "logits/chosen": -2.474071979522705, "logits/rejected": -2.5259792804718018, "logps/chosen": -314.3756103515625, "logps/rejected": -350.26910400390625, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -1.8292062282562256, "rewards/margins": 9.556681632995605, "rewards/rejected": -11.38588809967041, "step": 5300 }, { "epoch": 2.74, "learning_rate": 4.790590935169248e-08, "logits/chosen": -2.52644419670105, "logits/rejected": -2.389683961868286, "logps/chosen": -261.18524169921875, "logps/rejected": -429.006591796875, "loss": 0.0202, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.313180923461914, "rewards/margins": 10.647405624389648, "rewards/rejected": -11.960586547851562, "step": 5310 }, { "epoch": 2.75, "learning_rate": 4.69497035762096e-08, "logits/chosen": -2.4015848636627197, "logits/rejected": -2.414374351501465, "logps/chosen": -263.62530517578125, "logps/rejected": -333.758544921875, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": -2.166088819503784, "rewards/margins": 10.134454727172852, "rewards/rejected": -12.300543785095215, "step": 5320 }, { "epoch": 2.75, "learning_rate": 4.599349780072671e-08, "logits/chosen": -2.4161643981933594, "logits/rejected": -2.3900132179260254, "logps/chosen": -307.77691650390625, "logps/rejected": -320.50286865234375, "loss": 0.0129, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.1430935859680176, "rewards/margins": 10.124316215515137, "rewards/rejected": -11.267410278320312, "step": 5330 }, { "epoch": 2.76, "learning_rate": 4.5037292025243834e-08, "logits/chosen": -2.4651598930358887, "logits/rejected": -2.4811933040618896, "logps/chosen": -316.4070739746094, "logps/rejected": -383.3251953125, "loss": 0.01, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.1585972309112549, "rewards/margins": 9.961041450500488, "rewards/rejected": -11.119638442993164, "step": 5340 }, { "epoch": 2.76, "learning_rate": 4.408108624976094e-08, "logits/chosen": -2.570382595062256, "logits/rejected": -2.4643959999084473, "logps/chosen": -284.5560607910156, "logps/rejected": -375.60638427734375, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -0.8181749582290649, "rewards/margins": 11.522770881652832, "rewards/rejected": -12.340944290161133, "step": 5350 }, { "epoch": 2.77, "learning_rate": 4.3124880474278065e-08, "logits/chosen": -2.5256872177124023, "logits/rejected": -2.4669735431671143, "logps/chosen": -259.1166687011719, "logps/rejected": -323.52471923828125, "loss": 0.0206, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.9384600520133972, "rewards/margins": 9.923190116882324, "rewards/rejected": -10.861650466918945, "step": 5360 }, { "epoch": 2.77, "learning_rate": 4.2168674698795174e-08, "logits/chosen": -2.4286413192749023, "logits/rejected": -2.4100894927978516, "logps/chosen": -257.6455993652344, "logps/rejected": -365.386474609375, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -1.3937734365463257, "rewards/margins": 10.573844909667969, "rewards/rejected": -11.967617988586426, "step": 5370 }, { "epoch": 2.78, "learning_rate": 4.1212468923312296e-08, "logits/chosen": -2.3846893310546875, "logits/rejected": -2.356142520904541, "logps/chosen": -294.99725341796875, "logps/rejected": -372.8056945800781, "loss": 0.0129, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.4807031154632568, "rewards/margins": 10.132972717285156, "rewards/rejected": -11.613676071166992, "step": 5380 }, { "epoch": 2.78, "learning_rate": 4.025626314782941e-08, "logits/chosen": -2.3499274253845215, "logits/rejected": -2.3001906871795654, "logps/chosen": -292.0154113769531, "logps/rejected": -337.46771240234375, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.8701134920120239, "rewards/margins": 10.553464889526367, "rewards/rejected": -11.423578262329102, "step": 5390 }, { "epoch": 2.79, "learning_rate": 3.930005737234653e-08, "logits/chosen": -2.4598124027252197, "logits/rejected": -2.4664788246154785, "logps/chosen": -263.979248046875, "logps/rejected": -301.59130859375, "loss": 0.0147, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.568913221359253, "rewards/margins": 9.618475914001465, "rewards/rejected": -11.187389373779297, "step": 5400 }, { "epoch": 2.79, "learning_rate": 3.8343851596863644e-08, "logits/chosen": -2.4709832668304443, "logits/rejected": -2.414757490158081, "logps/chosen": -258.98486328125, "logps/rejected": -309.8785705566406, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -1.1285693645477295, "rewards/margins": 10.527348518371582, "rewards/rejected": -11.655917167663574, "step": 5410 }, { "epoch": 2.8, "learning_rate": 3.738764582138076e-08, "logits/chosen": -2.455583333969116, "logits/rejected": -2.375662088394165, "logps/chosen": -331.9709777832031, "logps/rejected": -379.05230712890625, "loss": 0.0132, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.4132988452911377, "rewards/margins": 10.533462524414062, "rewards/rejected": -11.946762084960938, "step": 5420 }, { "epoch": 2.8, "learning_rate": 3.6431440045897875e-08, "logits/chosen": -2.401982069015503, "logits/rejected": -2.4286303520202637, "logps/chosen": -284.4631042480469, "logps/rejected": -377.6039123535156, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -1.1193106174468994, "rewards/margins": 10.629063606262207, "rewards/rejected": -11.748373031616211, "step": 5430 }, { "epoch": 2.81, "learning_rate": 3.547523427041499e-08, "logits/chosen": -2.5329856872558594, "logits/rejected": -2.52848744392395, "logps/chosen": -295.4744567871094, "logps/rejected": -393.1733703613281, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -0.9103116989135742, "rewards/margins": 11.393068313598633, "rewards/rejected": -12.303380012512207, "step": 5440 }, { "epoch": 2.81, "learning_rate": 3.4519028494932106e-08, "logits/chosen": -2.411667823791504, "logits/rejected": -2.3320624828338623, "logps/chosen": -306.2268981933594, "logps/rejected": -367.2950439453125, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -0.8251739740371704, "rewards/margins": 10.851397514343262, "rewards/rejected": -11.6765718460083, "step": 5450 }, { "epoch": 2.82, "learning_rate": 3.356282271944923e-08, "logits/chosen": -2.448438882827759, "logits/rejected": -2.454094409942627, "logps/chosen": -266.05267333984375, "logps/rejected": -370.8230895996094, "loss": 0.0156, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.932195782661438, "rewards/margins": 10.790741920471191, "rewards/rejected": -11.722939491271973, "step": 5460 }, { "epoch": 2.82, "learning_rate": 3.260661694396634e-08, "logits/chosen": -2.5592355728149414, "logits/rejected": -2.4593288898468018, "logps/chosen": -336.9600524902344, "logps/rejected": -338.29656982421875, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -0.5741435289382935, "rewards/margins": 10.8987455368042, "rewards/rejected": -11.472888946533203, "step": 5470 }, { "epoch": 2.83, "learning_rate": 3.165041116848346e-08, "logits/chosen": -2.4696829319000244, "logits/rejected": -2.4493181705474854, "logps/chosen": -275.49609375, "logps/rejected": -422.721923828125, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -1.2121615409851074, "rewards/margins": 11.010812759399414, "rewards/rejected": -12.22297477722168, "step": 5480 }, { "epoch": 2.83, "learning_rate": 3.0694205393000576e-08, "logits/chosen": -2.4667916297912598, "logits/rejected": -2.3578381538391113, "logps/chosen": -262.20989990234375, "logps/rejected": -345.19000244140625, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -0.7185470461845398, "rewards/margins": 10.832283020019531, "rewards/rejected": -11.550830841064453, "step": 5490 }, { "epoch": 2.84, "learning_rate": 2.9737999617517688e-08, "logits/chosen": -2.38720965385437, "logits/rejected": -2.320250988006592, "logps/chosen": -308.02581787109375, "logps/rejected": -337.2959899902344, "loss": 0.0092, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.0859707593917847, "rewards/margins": 10.13597583770752, "rewards/rejected": -11.221944808959961, "step": 5500 }, { "epoch": 2.84, "eval_logits/chosen": -2.5149059295654297, "eval_logits/rejected": -2.4855728149414062, "eval_logps/chosen": -319.4794006347656, "eval_logps/rejected": -311.7966003417969, "eval_loss": 0.732962429523468, "eval_rewards/accuracies": 0.7599999904632568, "eval_rewards/chosen": -4.375596046447754, "eval_rewards/margins": 3.5679359436035156, "eval_rewards/rejected": -7.943531513214111, "eval_runtime": 463.3044, "eval_samples_per_second": 4.317, "eval_steps_per_second": 0.27, "step": 5500 }, { "epoch": 2.84, "learning_rate": 2.8781793842034804e-08, "logits/chosen": -2.3183465003967285, "logits/rejected": -2.3097920417785645, "logps/chosen": -262.3741455078125, "logps/rejected": -333.424072265625, "loss": 0.015, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.9249290227890015, "rewards/margins": 9.927213668823242, "rewards/rejected": -11.852142333984375, "step": 5510 }, { "epoch": 2.85, "learning_rate": 2.782558806655192e-08, "logits/chosen": -2.3864715099334717, "logits/rejected": -2.427034616470337, "logps/chosen": -284.58843994140625, "logps/rejected": -362.7284851074219, "loss": 0.0135, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.4909298419952393, "rewards/margins": 9.80453872680664, "rewards/rejected": -11.295469284057617, "step": 5520 }, { "epoch": 2.85, "learning_rate": 2.6869382291069035e-08, "logits/chosen": -2.4152259826660156, "logits/rejected": -2.5505833625793457, "logps/chosen": -306.852783203125, "logps/rejected": -364.6080017089844, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -1.235000491142273, "rewards/margins": 10.352682113647461, "rewards/rejected": -11.587682723999023, "step": 5530 }, { "epoch": 2.86, "learning_rate": 2.591317651558615e-08, "logits/chosen": -2.4661903381347656, "logits/rejected": -2.3790674209594727, "logps/chosen": -267.7341003417969, "logps/rejected": -350.3778991699219, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -1.195554256439209, "rewards/margins": 10.710996627807617, "rewards/rejected": -11.906549453735352, "step": 5540 }, { "epoch": 2.87, "learning_rate": 2.4956970740103267e-08, "logits/chosen": -2.466414213180542, "logits/rejected": -2.3303287029266357, "logps/chosen": -290.38360595703125, "logps/rejected": -394.87103271484375, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -1.4862712621688843, "rewards/margins": 10.932287216186523, "rewards/rejected": -12.418558120727539, "step": 5550 }, { "epoch": 2.87, "learning_rate": 2.4000764964620386e-08, "logits/chosen": -2.4945197105407715, "logits/rejected": -2.4668118953704834, "logps/chosen": -356.53009033203125, "logps/rejected": -385.39581298828125, "loss": 0.0193, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -1.3914432525634766, "rewards/margins": 9.552518844604492, "rewards/rejected": -10.943963050842285, "step": 5560 }, { "epoch": 2.88, "learning_rate": 2.30445591891375e-08, "logits/chosen": -2.5244903564453125, "logits/rejected": -2.4784955978393555, "logps/chosen": -332.05157470703125, "logps/rejected": -369.33013916015625, "loss": 0.0089, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.6902099251747131, "rewards/margins": 11.023591995239258, "rewards/rejected": -11.713800430297852, "step": 5570 }, { "epoch": 2.88, "learning_rate": 2.2088353413654617e-08, "logits/chosen": -2.3487985134124756, "logits/rejected": -2.35017728805542, "logps/chosen": -245.8054656982422, "logps/rejected": -308.2724914550781, "loss": 0.016, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.4316474199295044, "rewards/margins": 10.602240562438965, "rewards/rejected": -12.03388786315918, "step": 5580 }, { "epoch": 2.89, "learning_rate": 2.1132147638171733e-08, "logits/chosen": -2.430948257446289, "logits/rejected": -2.4136359691619873, "logps/chosen": -328.83123779296875, "logps/rejected": -336.42816162109375, "loss": 0.0124, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.2018016129732132, "rewards/margins": 10.71995735168457, "rewards/rejected": -10.921757698059082, "step": 5590 }, { "epoch": 2.89, "learning_rate": 2.0175941862688848e-08, "logits/chosen": -2.4677605628967285, "logits/rejected": -2.4360225200653076, "logps/chosen": -256.86199951171875, "logps/rejected": -328.8375549316406, "loss": 0.0068, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.112344741821289, "rewards/margins": 10.564764022827148, "rewards/rejected": -11.677107810974121, "step": 5600 }, { "epoch": 2.9, "learning_rate": 1.9219736087205964e-08, "logits/chosen": -2.398508071899414, "logits/rejected": -2.424389123916626, "logps/chosen": -263.18658447265625, "logps/rejected": -337.9579772949219, "loss": 0.0184, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.4862511157989502, "rewards/margins": 9.879942893981934, "rewards/rejected": -11.366193771362305, "step": 5610 }, { "epoch": 2.9, "learning_rate": 1.826353031172308e-08, "logits/chosen": -2.363523483276367, "logits/rejected": -2.4009668827056885, "logps/chosen": -320.4283142089844, "logps/rejected": -435.572509765625, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -0.7103533148765564, "rewards/margins": 11.305900573730469, "rewards/rejected": -12.016253471374512, "step": 5620 }, { "epoch": 2.91, "learning_rate": 1.73073245362402e-08, "logits/chosen": -2.418933391571045, "logits/rejected": -2.3981566429138184, "logps/chosen": -300.5345764160156, "logps/rejected": -324.4273376464844, "loss": 0.018, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.3486359119415283, "rewards/margins": 10.127408981323242, "rewards/rejected": -11.476043701171875, "step": 5630 }, { "epoch": 2.91, "learning_rate": 1.6351118760757314e-08, "logits/chosen": -2.4389071464538574, "logits/rejected": -2.330331802368164, "logps/chosen": -280.1732482910156, "logps/rejected": -337.8651123046875, "loss": 0.0103, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.2679827213287354, "rewards/margins": 9.86819076538086, "rewards/rejected": -11.136173248291016, "step": 5640 }, { "epoch": 2.92, "learning_rate": 1.539491298527443e-08, "logits/chosen": -2.44217586517334, "logits/rejected": -2.413456678390503, "logps/chosen": -247.275146484375, "logps/rejected": -348.5274963378906, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.907915711402893, "rewards/margins": 9.879022598266602, "rewards/rejected": -11.786938667297363, "step": 5650 }, { "epoch": 2.92, "learning_rate": 1.4438707209791546e-08, "logits/chosen": -2.368377685546875, "logits/rejected": -2.5231080055236816, "logps/chosen": -306.4520263671875, "logps/rejected": -321.46710205078125, "loss": 0.0285, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.2531068325042725, "rewards/margins": 9.472089767456055, "rewards/rejected": -10.72519588470459, "step": 5660 }, { "epoch": 2.93, "learning_rate": 1.3482501434308661e-08, "logits/chosen": -2.4074292182922363, "logits/rejected": -2.306896448135376, "logps/chosen": -296.39984130859375, "logps/rejected": -327.9219055175781, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": -1.2679624557495117, "rewards/margins": 9.967606544494629, "rewards/rejected": -11.235568046569824, "step": 5670 }, { "epoch": 2.93, "learning_rate": 1.2526295658825777e-08, "logits/chosen": -2.5389842987060547, "logits/rejected": -2.484412908554077, "logps/chosen": -302.4338684082031, "logps/rejected": -372.11822509765625, "loss": 0.0147, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.3193234205245972, "rewards/margins": 10.425549507141113, "rewards/rejected": -11.744871139526367, "step": 5680 }, { "epoch": 2.94, "learning_rate": 1.1570089883342895e-08, "logits/chosen": -2.3705217838287354, "logits/rejected": -2.409170389175415, "logps/chosen": -312.36614990234375, "logps/rejected": -406.3182678222656, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -1.161109209060669, "rewards/margins": 11.536172866821289, "rewards/rejected": -12.697282791137695, "step": 5690 }, { "epoch": 2.94, "learning_rate": 1.061388410786001e-08, "logits/chosen": -2.424706220626831, "logits/rejected": -2.459883213043213, "logps/chosen": -288.7680969238281, "logps/rejected": -316.0674743652344, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -1.269592523574829, "rewards/margins": 9.58020305633545, "rewards/rejected": -10.849796295166016, "step": 5700 }, { "epoch": 2.95, "learning_rate": 9.657678332377126e-09, "logits/chosen": -2.3998100757598877, "logits/rejected": -2.329770565032959, "logps/chosen": -285.0074157714844, "logps/rejected": -316.50445556640625, "loss": 0.0137, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.7311685085296631, "rewards/margins": 9.935151100158691, "rewards/rejected": -10.6663179397583, "step": 5710 }, { "epoch": 2.95, "learning_rate": 8.701472556894243e-09, "logits/chosen": -2.3474459648132324, "logits/rejected": -2.4268414974212646, "logps/chosen": -265.39727783203125, "logps/rejected": -366.6424865722656, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.919445276260376, "rewards/margins": 10.569762229919434, "rewards/rejected": -12.489209175109863, "step": 5720 }, { "epoch": 2.96, "learning_rate": 7.745266781411359e-09, "logits/chosen": -2.407890796661377, "logits/rejected": -2.4742534160614014, "logps/chosen": -316.9874572753906, "logps/rejected": -378.4074401855469, "loss": 0.0217, "rewards/accuracies": 1.0, "rewards/chosen": -0.8994172811508179, "rewards/margins": 10.128255844116211, "rewards/rejected": -11.027674674987793, "step": 5730 }, { "epoch": 2.96, "learning_rate": 6.7890610059284754e-09, "logits/chosen": -2.4006435871124268, "logits/rejected": -2.4294021129608154, "logps/chosen": -243.5701446533203, "logps/rejected": -294.20233154296875, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -1.2013685703277588, "rewards/margins": 9.605109214782715, "rewards/rejected": -10.806478500366211, "step": 5740 }, { "epoch": 2.97, "learning_rate": 5.832855230445592e-09, "logits/chosen": -2.395141124725342, "logits/rejected": -2.4526638984680176, "logps/chosen": -296.9883117675781, "logps/rejected": -336.3964538574219, "loss": 0.0085, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.263058066368103, "rewards/margins": 10.24799633026123, "rewards/rejected": -11.511053085327148, "step": 5750 }, { "epoch": 2.97, "learning_rate": 4.8766494549627085e-09, "logits/chosen": -2.397413969039917, "logits/rejected": -2.379274368286133, "logps/chosen": -286.8966064453125, "logps/rejected": -352.62103271484375, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -0.2679819166660309, "rewards/margins": 11.343156814575195, "rewards/rejected": -11.611139297485352, "step": 5760 }, { "epoch": 2.98, "learning_rate": 3.920443679479824e-09, "logits/chosen": -2.501077175140381, "logits/rejected": -2.474958896636963, "logps/chosen": -324.0655822753906, "logps/rejected": -336.5184020996094, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": -1.2807409763336182, "rewards/margins": 9.82066535949707, "rewards/rejected": -11.10140609741211, "step": 5770 }, { "epoch": 2.98, "learning_rate": 2.96423790399694e-09, "logits/chosen": -2.4477200508117676, "logits/rejected": -2.471569538116455, "logps/chosen": -294.63897705078125, "logps/rejected": -357.42059326171875, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -1.2787177562713623, "rewards/margins": 9.29156494140625, "rewards/rejected": -10.570282936096191, "step": 5780 }, { "epoch": 2.99, "learning_rate": 2.008032128514056e-09, "logits/chosen": -2.3351516723632812, "logits/rejected": -2.407452344894409, "logps/chosen": -300.2392883300781, "logps/rejected": -361.8913269042969, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -1.1922436952590942, "rewards/margins": 9.98654556274414, "rewards/rejected": -11.178790092468262, "step": 5790 }, { "epoch": 2.99, "learning_rate": 1.0518263530311723e-09, "logits/chosen": -2.4000933170318604, "logits/rejected": -2.4726662635803223, "logps/chosen": -244.21127319335938, "logps/rejected": -344.6064453125, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -1.5048989057540894, "rewards/margins": 9.535536766052246, "rewards/rejected": -11.040433883666992, "step": 5800 }, { "epoch": 3.0, "learning_rate": 9.562057754828839e-11, "logits/chosen": -2.3397300243377686, "logits/rejected": -2.444467067718506, "logps/chosen": -252.40072631835938, "logps/rejected": -342.96197509765625, "loss": 0.0231, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.8680833578109741, "rewards/margins": 9.311616897583008, "rewards/rejected": -10.17970085144043, "step": 5810 }, { "epoch": 3.0, "step": 5811, "total_flos": 0.0, "train_loss": 0.21807545795603994, "train_runtime": 74953.8161, "train_samples_per_second": 2.48, "train_steps_per_second": 0.078 } ], "logging_steps": 10, "max_steps": 5811, "num_train_epochs": 3, "save_steps": 500, "total_flos": 0.0, "trial_name": null, "trial_params": null }