{ "best_metric": 0.573898196220398, "best_model_checkpoint": "data/tinyllama_moe_dpo_ultrafeedback_v2_epochs5/checkpoint-3300", "epoch": 4.998953427524856, "eval_steps": 100, "global_step": 4775, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5.208333333333333e-09, "logits/chosen": -2.7229816913604736, "logits/rejected": -2.704376220703125, "logps/chosen": -295.48358154296875, "logps/rejected": -277.29522705078125, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.01, "learning_rate": 5.208333333333333e-08, "logits/chosen": -2.7768375873565674, "logits/rejected": -2.6537435054779053, "logps/chosen": -356.50335693359375, "logps/rejected": -288.44366455078125, "loss": 0.6934, "rewards/accuracies": 0.4236111044883728, "rewards/chosen": -0.0006966523360460997, "rewards/margins": -0.0007656050729565322, "rewards/rejected": 6.895273691043258e-05, "step": 10 }, { "epoch": 0.02, "learning_rate": 1.0416666666666667e-07, "logits/chosen": -2.7214996814727783, "logits/rejected": -2.6908183097839355, "logps/chosen": -313.5826721191406, "logps/rejected": -281.9164733886719, "loss": 0.6931, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": 0.00022428599186241627, "rewards/margins": 0.000738097180146724, "rewards/rejected": -0.0005138111882843077, "step": 20 }, { "epoch": 0.03, "learning_rate": 1.5624999999999999e-07, "logits/chosen": -2.7791266441345215, "logits/rejected": -2.7023978233337402, "logps/chosen": -346.8282165527344, "logps/rejected": -305.5320739746094, "loss": 0.693, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": 0.00040574674494564533, "rewards/margins": -0.00035077956272289157, "rewards/rejected": 0.0007565263076685369, "step": 30 }, { "epoch": 0.04, "learning_rate": 2.0833333333333333e-07, "logits/chosen": -2.7579116821289062, "logits/rejected": -2.6938705444335938, "logps/chosen": -336.7049865722656, "logps/rejected": -282.226806640625, "loss": 0.6935, "rewards/accuracies": 0.4937500059604645, "rewards/chosen": -4.12855988543015e-05, "rewards/margins": 0.0006455664406530559, "rewards/rejected": -0.0006868520868010819, "step": 40 }, { "epoch": 0.05, "learning_rate": 2.604166666666667e-07, "logits/chosen": -2.773176670074463, "logits/rejected": -2.7138824462890625, "logps/chosen": -352.06036376953125, "logps/rejected": -314.73699951171875, "loss": 0.6927, "rewards/accuracies": 0.543749988079071, "rewards/chosen": 0.001099282642826438, "rewards/margins": 0.0008529500337317586, "rewards/rejected": 0.0002463326381985098, "step": 50 }, { "epoch": 0.06, "learning_rate": 3.1249999999999997e-07, "logits/chosen": -2.752551555633545, "logits/rejected": -2.667330265045166, "logps/chosen": -353.0582275390625, "logps/rejected": -323.7419738769531, "loss": 0.6928, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": 0.0003327417653053999, "rewards/margins": 5.731172677769791e-06, "rewards/rejected": 0.000327010580804199, "step": 60 }, { "epoch": 0.07, "learning_rate": 3.645833333333333e-07, "logits/chosen": -2.8295810222625732, "logits/rejected": -2.751282215118408, "logps/chosen": -387.351318359375, "logps/rejected": -340.2878112792969, "loss": 0.6928, "rewards/accuracies": 0.46875, "rewards/chosen": 0.0006954811397008598, "rewards/margins": -0.0006631066789850593, "rewards/rejected": 0.00135858787689358, "step": 70 }, { "epoch": 0.08, "learning_rate": 4.1666666666666667e-07, "logits/chosen": -2.6580421924591064, "logits/rejected": -2.5816047191619873, "logps/chosen": -359.86114501953125, "logps/rejected": -300.00640869140625, "loss": 0.6926, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.0022001974284648895, "rewards/margins": 0.0019074224401265383, "rewards/rejected": 0.00029277493013069034, "step": 80 }, { "epoch": 0.09, "learning_rate": 4.6874999999999996e-07, "logits/chosen": -2.7795090675354004, "logits/rejected": -2.7111852169036865, "logps/chosen": -353.28106689453125, "logps/rejected": -316.5885314941406, "loss": 0.6918, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.003978191874921322, "rewards/margins": 0.004179838579148054, "rewards/rejected": -0.00020164628222119063, "step": 90 }, { "epoch": 0.1, "learning_rate": 4.999990983803055e-07, "logits/chosen": -2.7545604705810547, "logits/rejected": -2.7175841331481934, "logps/chosen": -371.634765625, "logps/rejected": -347.4078674316406, "loss": 0.6913, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.0036698828916996717, "rewards/margins": 0.0029316016007214785, "rewards/rejected": 0.0007382815820164979, "step": 100 }, { "epoch": 0.1, "eval_logits/chosen": -2.7888541221618652, "eval_logits/rejected": -2.717860698699951, "eval_logps/chosen": -348.8463134765625, "eval_logps/rejected": -307.7887268066406, "eval_loss": 0.6915069818496704, "eval_rewards/accuracies": 0.601190447807312, "eval_rewards/chosen": 0.005134147591888905, "eval_rewards/margins": 0.004059688653796911, "eval_rewards/rejected": 0.0010744588216766715, "eval_runtime": 351.1264, "eval_samples_per_second": 5.696, "eval_steps_per_second": 0.179, "step": 100 }, { "epoch": 0.12, "learning_rate": 4.999889552334295e-07, "logits/chosen": -2.7624781131744385, "logits/rejected": -2.6426429748535156, "logps/chosen": -319.4280700683594, "logps/rejected": -255.9808807373047, "loss": 0.691, "rewards/accuracies": 0.59375, "rewards/chosen": 0.003979907371103764, "rewards/margins": 0.0030905543826520443, "rewards/rejected": 0.0008893535705283284, "step": 110 }, { "epoch": 0.13, "learning_rate": 4.999675423738452e-07, "logits/chosen": -2.739222764968872, "logits/rejected": -2.634364128112793, "logps/chosen": -365.7749938964844, "logps/rejected": -293.59381103515625, "loss": 0.6903, "rewards/accuracies": 0.612500011920929, "rewards/chosen": 0.008735042065382004, "rewards/margins": 0.008576452732086182, "rewards/rejected": 0.00015858971164561808, "step": 120 }, { "epoch": 0.14, "learning_rate": 4.999348607668623e-07, "logits/chosen": -2.7971653938293457, "logits/rejected": -2.7021219730377197, "logps/chosen": -385.40155029296875, "logps/rejected": -314.69622802734375, "loss": 0.6895, "rewards/accuracies": 0.612500011920929, "rewards/chosen": 0.009917149320244789, "rewards/margins": 0.00689274538308382, "rewards/rejected": 0.0030244034714996815, "step": 130 }, { "epoch": 0.15, "learning_rate": 4.998909118857952e-07, "logits/chosen": -2.7118449211120605, "logits/rejected": -2.6747097969055176, "logps/chosen": -291.15789794921875, "logps/rejected": -265.5232849121094, "loss": 0.6892, "rewards/accuracies": 0.625, "rewards/chosen": 0.010748682543635368, "rewards/margins": 0.0077992090955376625, "rewards/rejected": 0.002949473215267062, "step": 140 }, { "epoch": 0.16, "learning_rate": 4.998356977118967e-07, "logits/chosen": -2.7854163646698, "logits/rejected": -2.7411043643951416, "logps/chosen": -341.4150695800781, "logps/rejected": -339.93988037109375, "loss": 0.6895, "rewards/accuracies": 0.59375, "rewards/chosen": 0.008866357617080212, "rewards/margins": 0.002817091066390276, "rewards/rejected": 0.006049267947673798, "step": 150 }, { "epoch": 0.17, "learning_rate": 4.99769220734268e-07, "logits/chosen": -2.793144464492798, "logits/rejected": -2.7030389308929443, "logps/chosen": -357.8925476074219, "logps/rejected": -336.72650146484375, "loss": 0.6872, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": 0.014735780656337738, "rewards/margins": 0.01035328023135662, "rewards/rejected": 0.004382501356303692, "step": 160 }, { "epoch": 0.18, "learning_rate": 4.996914839497473e-07, "logits/chosen": -2.7929883003234863, "logits/rejected": -2.7215566635131836, "logps/chosen": -330.4803161621094, "logps/rejected": -284.5147705078125, "loss": 0.6865, "rewards/accuracies": 0.643750011920929, "rewards/chosen": 0.0183271374553442, "rewards/margins": 0.013379251584410667, "rewards/rejected": 0.004947885405272245, "step": 170 }, { "epoch": 0.19, "learning_rate": 4.996024908627745e-07, "logits/chosen": -2.7179646492004395, "logits/rejected": -2.629631280899048, "logps/chosen": -302.9635009765625, "logps/rejected": -271.3373107910156, "loss": 0.6859, "rewards/accuracies": 0.643750011920929, "rewards/chosen": 0.01472156960517168, "rewards/margins": 0.016796987503767014, "rewards/rejected": -0.0020754183642566204, "step": 180 }, { "epoch": 0.2, "learning_rate": 4.995022454852327e-07, "logits/chosen": -2.793166160583496, "logits/rejected": -2.6981942653656006, "logps/chosen": -346.3775939941406, "logps/rejected": -303.22491455078125, "loss": 0.6852, "rewards/accuracies": 0.625, "rewards/chosen": 0.0220264233648777, "rewards/margins": 0.01690804772078991, "rewards/rejected": 0.005118372850120068, "step": 190 }, { "epoch": 0.21, "learning_rate": 4.993907523362682e-07, "logits/chosen": -2.7156126499176025, "logits/rejected": -2.6670401096343994, "logps/chosen": -347.8214416503906, "logps/rejected": -319.7621154785156, "loss": 0.6848, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.02535415254533291, "rewards/margins": 0.01954091526567936, "rewards/rejected": 0.005813241004943848, "step": 200 }, { "epoch": 0.21, "eval_logits/chosen": -2.778569221496582, "eval_logits/rejected": -2.706406593322754, "eval_logps/chosen": -347.1147766113281, "eval_logps/rejected": -307.7813720703125, "eval_loss": 0.6843611001968384, "eval_rewards/accuracies": 0.6547619104385376, "eval_rewards/chosen": 0.022449664771556854, "eval_rewards/margins": 0.02130187302827835, "eval_rewards/rejected": 0.0011477925581857562, "eval_runtime": 354.9846, "eval_samples_per_second": 5.634, "eval_steps_per_second": 0.177, "step": 200 }, { "epoch": 0.22, "learning_rate": 4.992680164420859e-07, "logits/chosen": -2.7765281200408936, "logits/rejected": -2.670767068862915, "logps/chosen": -370.70208740234375, "logps/rejected": -300.8094177246094, "loss": 0.6842, "rewards/accuracies": 0.643750011920929, "rewards/chosen": 0.022380836308002472, "rewards/margins": 0.020519474521279335, "rewards/rejected": 0.001861358410678804, "step": 210 }, { "epoch": 0.23, "learning_rate": 4.991340433357235e-07, "logits/chosen": -2.776369571685791, "logits/rejected": -2.6916940212249756, "logps/chosen": -353.1146545410156, "logps/rejected": -323.1269226074219, "loss": 0.6831, "rewards/accuracies": 0.625, "rewards/chosen": 0.027633443474769592, "rewards/margins": 0.02584686316549778, "rewards/rejected": 0.0017865825211629272, "step": 220 }, { "epoch": 0.24, "learning_rate": 4.989888390568014e-07, "logits/chosen": -2.739046812057495, "logits/rejected": -2.6546576023101807, "logps/chosen": -353.8674621582031, "logps/rejected": -290.97503662109375, "loss": 0.6788, "rewards/accuracies": 0.643750011920929, "rewards/chosen": 0.023385953158140182, "rewards/margins": 0.026143008843064308, "rewards/rejected": -0.002757056849077344, "step": 230 }, { "epoch": 0.25, "learning_rate": 4.988324101512507e-07, "logits/chosen": -2.730693817138672, "logits/rejected": -2.6349058151245117, "logps/chosen": -338.3262634277344, "logps/rejected": -270.018798828125, "loss": 0.6805, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": 0.027155738323926926, "rewards/margins": 0.025723371654748917, "rewards/rejected": 0.0014323694631457329, "step": 240 }, { "epoch": 0.26, "learning_rate": 4.986647636710183e-07, "logits/chosen": -2.7343668937683105, "logits/rejected": -2.7023162841796875, "logps/chosen": -322.39031982421875, "logps/rejected": -322.66693115234375, "loss": 0.6811, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.02547827921807766, "rewards/margins": 0.026273246854543686, "rewards/rejected": -0.0007949693244881928, "step": 250 }, { "epoch": 0.27, "learning_rate": 4.984859071737489e-07, "logits/chosen": -2.7031362056732178, "logits/rejected": -2.6224429607391357, "logps/chosen": -345.26470947265625, "logps/rejected": -309.10003662109375, "loss": 0.679, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.025715002790093422, "rewards/margins": 0.036292947828769684, "rewards/rejected": -0.010577939450740814, "step": 260 }, { "epoch": 0.28, "learning_rate": 4.982958487224441e-07, "logits/chosen": -2.809894323348999, "logits/rejected": -2.717299699783325, "logps/chosen": -356.2657165527344, "logps/rejected": -297.43341064453125, "loss": 0.6773, "rewards/accuracies": 0.71875, "rewards/chosen": 0.03299617022275925, "rewards/margins": 0.04462386667728424, "rewards/rejected": -0.011627699248492718, "step": 270 }, { "epoch": 0.29, "learning_rate": 4.980945968850989e-07, "logits/chosen": -2.7708637714385986, "logits/rejected": -2.7318742275238037, "logps/chosen": -355.09332275390625, "logps/rejected": -334.1681213378906, "loss": 0.6789, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.022588271647691727, "rewards/margins": 0.02767338789999485, "rewards/rejected": -0.0050851134583354, "step": 280 }, { "epoch": 0.3, "learning_rate": 4.978821607343156e-07, "logits/chosen": -2.7207131385803223, "logits/rejected": -2.6686415672302246, "logps/chosen": -339.83685302734375, "logps/rejected": -300.10791015625, "loss": 0.6748, "rewards/accuracies": 0.625, "rewards/chosen": 0.024562764912843704, "rewards/margins": 0.03406853228807449, "rewards/rejected": -0.009505772963166237, "step": 290 }, { "epoch": 0.31, "learning_rate": 4.976585498468949e-07, "logits/chosen": -2.750870704650879, "logits/rejected": -2.6027140617370605, "logps/chosen": -343.34881591796875, "logps/rejected": -281.9125671386719, "loss": 0.6719, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.02178148925304413, "rewards/margins": 0.04298964887857437, "rewards/rejected": -0.021208161488175392, "step": 300 }, { "epoch": 0.31, "eval_logits/chosen": -2.756394147872925, "eval_logits/rejected": -2.682809829711914, "eval_logps/chosen": -347.1925964355469, "eval_logps/rejected": -310.327392578125, "eval_loss": 0.6745370030403137, "eval_rewards/accuracies": 0.6567460298538208, "eval_rewards/chosen": 0.021671386435627937, "eval_rewards/margins": 0.04598393663764, "eval_rewards/rejected": -0.024312546476721764, "eval_runtime": 370.5051, "eval_samples_per_second": 5.398, "eval_steps_per_second": 0.17, "step": 300 }, { "epoch": 0.32, "learning_rate": 4.974237743034041e-07, "logits/chosen": -2.6656975746154785, "logits/rejected": -2.628554582595825, "logps/chosen": -343.7931213378906, "logps/rejected": -320.90826416015625, "loss": 0.6684, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": 0.0219754446297884, "rewards/margins": 0.06229216977953911, "rewards/rejected": -0.04031673073768616, "step": 310 }, { "epoch": 0.33, "learning_rate": 4.971778446877224e-07, "logits/chosen": -2.688197374343872, "logits/rejected": -2.6364893913269043, "logps/chosen": -334.7703857421875, "logps/rejected": -318.79986572265625, "loss": 0.6706, "rewards/accuracies": 0.612500011920929, "rewards/chosen": 0.02248135581612587, "rewards/margins": 0.0489434115588665, "rewards/rejected": -0.02646205946803093, "step": 320 }, { "epoch": 0.35, "learning_rate": 4.96920772086564e-07, "logits/chosen": -2.6598384380340576, "logits/rejected": -2.589719533920288, "logps/chosen": -335.1375427246094, "logps/rejected": -277.85833740234375, "loss": 0.6743, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.025626670569181442, "rewards/margins": 0.04879312217235565, "rewards/rejected": -0.023166455328464508, "step": 330 }, { "epoch": 0.36, "learning_rate": 4.966525680889785e-07, "logits/chosen": -2.6839308738708496, "logits/rejected": -2.6098990440368652, "logps/chosen": -296.1307067871094, "logps/rejected": -268.22125244140625, "loss": 0.674, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.0010839557508006692, "rewards/margins": 0.031559232622385025, "rewards/rejected": -0.032643191516399384, "step": 340 }, { "epoch": 0.37, "learning_rate": 4.963732447858279e-07, "logits/chosen": -2.65653133392334, "logits/rejected": -2.650408983230591, "logps/chosen": -334.28076171875, "logps/rejected": -332.80877685546875, "loss": 0.6724, "rewards/accuracies": 0.625, "rewards/chosen": -0.0002481082337908447, "rewards/margins": 0.046056605875492096, "rewards/rejected": -0.046304717659950256, "step": 350 }, { "epoch": 0.38, "learning_rate": 4.960828147692421e-07, "logits/chosen": -2.7294137477874756, "logits/rejected": -2.6597273349761963, "logps/chosen": -334.6825256347656, "logps/rejected": -288.56488037109375, "loss": 0.6662, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.012845727615058422, "rewards/margins": 0.04838230460882187, "rewards/rejected": -0.06122802942991257, "step": 360 }, { "epoch": 0.39, "learning_rate": 4.957812911320509e-07, "logits/chosen": -2.6296286582946777, "logits/rejected": -2.6109249591827393, "logps/chosen": -287.88055419921875, "logps/rejected": -300.3601989746094, "loss": 0.6645, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.01973501779139042, "rewards/margins": 0.057510875165462494, "rewards/rejected": -0.07724590599536896, "step": 370 }, { "epoch": 0.4, "learning_rate": 4.95468687467194e-07, "logits/chosen": -2.7505180835723877, "logits/rejected": -2.6816720962524414, "logps/chosen": -361.86358642578125, "logps/rejected": -319.72576904296875, "loss": 0.6666, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.012716621160507202, "rewards/margins": 0.06772245466709137, "rewards/rejected": -0.08043907582759857, "step": 380 }, { "epoch": 0.41, "learning_rate": 4.951450178671078e-07, "logits/chosen": -2.6552157402038574, "logits/rejected": -2.5710456371307373, "logps/chosen": -332.8970031738281, "logps/rejected": -284.53131103515625, "loss": 0.6676, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.0116237448528409, "rewards/margins": 0.06472723931074142, "rewards/rejected": -0.07635099440813065, "step": 390 }, { "epoch": 0.42, "learning_rate": 4.948102969230907e-07, "logits/chosen": -2.7454886436462402, "logits/rejected": -2.6737310886383057, "logps/chosen": -372.53106689453125, "logps/rejected": -322.9483337402344, "loss": 0.6593, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.01839020662009716, "rewards/margins": 0.07976034283638, "rewards/rejected": -0.09815056622028351, "step": 400 }, { "epoch": 0.42, "eval_logits/chosen": -2.7167913913726807, "eval_logits/rejected": -2.641690731048584, "eval_logps/chosen": -351.2079162597656, "eval_logps/rejected": -317.75079345703125, "eval_loss": 0.662617564201355, "eval_rewards/accuracies": 0.6626983880996704, "eval_rewards/chosen": -0.01848192885518074, "eval_rewards/margins": 0.08006466180086136, "eval_rewards/rejected": -0.0985465869307518, "eval_runtime": 329.4056, "eval_samples_per_second": 6.072, "eval_steps_per_second": 0.191, "step": 400 }, { "epoch": 0.43, "learning_rate": 4.944645397246446e-07, "logits/chosen": -2.7801225185394287, "logits/rejected": -2.722992420196533, "logps/chosen": -375.0428771972656, "logps/rejected": -349.60369873046875, "loss": 0.6599, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.00478482898324728, "rewards/margins": 0.08646519482135773, "rewards/rejected": -0.09125002473592758, "step": 410 }, { "epoch": 0.44, "learning_rate": 4.941077618587955e-07, "logits/chosen": -2.634456157684326, "logits/rejected": -2.5576937198638916, "logps/chosen": -313.3975524902344, "logps/rejected": -267.78460693359375, "loss": 0.6589, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.017164412885904312, "rewards/margins": 0.07831953465938568, "rewards/rejected": -0.0954839438199997, "step": 420 }, { "epoch": 0.45, "learning_rate": 4.937399794093903e-07, "logits/chosen": -2.6605842113494873, "logits/rejected": -2.618790864944458, "logps/chosen": -318.903076171875, "logps/rejected": -288.6401062011719, "loss": 0.6616, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.028123896569013596, "rewards/margins": 0.06676146388053894, "rewards/rejected": -0.09488535672426224, "step": 430 }, { "epoch": 0.46, "learning_rate": 4.933612089563714e-07, "logits/chosen": -2.6676137447357178, "logits/rejected": -2.6490044593811035, "logps/chosen": -319.513671875, "logps/rejected": -300.4092712402344, "loss": 0.6587, "rewards/accuracies": 0.59375, "rewards/chosen": -0.04882526397705078, "rewards/margins": 0.0546044185757637, "rewards/rejected": -0.10342969000339508, "step": 440 }, { "epoch": 0.47, "learning_rate": 4.929714675750299e-07, "logits/chosen": -2.5612893104553223, "logits/rejected": -2.5102691650390625, "logps/chosen": -322.2162170410156, "logps/rejected": -295.4486389160156, "loss": 0.6549, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.023067612200975418, "rewards/margins": 0.1121089830994606, "rewards/rejected": -0.13517656922340393, "step": 450 }, { "epoch": 0.48, "learning_rate": 4.925707728352358e-07, "logits/chosen": -2.659719467163086, "logits/rejected": -2.5714974403381348, "logps/chosen": -328.37091064453125, "logps/rejected": -311.16217041015625, "loss": 0.6502, "rewards/accuracies": 0.625, "rewards/chosen": -0.04032892733812332, "rewards/margins": 0.08182945102453232, "rewards/rejected": -0.12215838581323624, "step": 460 }, { "epoch": 0.49, "learning_rate": 4.921591428006456e-07, "logits/chosen": -2.680175304412842, "logits/rejected": -2.578962564468384, "logps/chosen": -373.35870361328125, "logps/rejected": -318.6756896972656, "loss": 0.6436, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.03967723622918129, "rewards/margins": 0.15787221491336823, "rewards/rejected": -0.19754944741725922, "step": 470 }, { "epoch": 0.5, "learning_rate": 4.917365960278877e-07, "logits/chosen": -2.5912580490112305, "logits/rejected": -2.5477182865142822, "logps/chosen": -288.45233154296875, "logps/rejected": -286.4162292480469, "loss": 0.6651, "rewards/accuracies": 0.59375, "rewards/chosen": -0.09010852873325348, "rewards/margins": 0.052035313099622726, "rewards/rejected": -0.14214381575584412, "step": 480 }, { "epoch": 0.51, "learning_rate": 4.913031515657269e-07, "logits/chosen": -2.668935775756836, "logits/rejected": -2.566549777984619, "logps/chosen": -343.8060607910156, "logps/rejected": -311.8091735839844, "loss": 0.6491, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.08070842921733856, "rewards/margins": 0.10430131107568741, "rewards/rejected": -0.18500974774360657, "step": 490 }, { "epoch": 0.52, "learning_rate": 4.908588289542046e-07, "logits/chosen": -2.633600950241089, "logits/rejected": -2.5761375427246094, "logps/chosen": -332.14471435546875, "logps/rejected": -312.8584289550781, "loss": 0.6489, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08002828061580658, "rewards/margins": 0.12527289986610413, "rewards/rejected": -0.2053011953830719, "step": 500 }, { "epoch": 0.52, "eval_logits/chosen": -2.676621913909912, "eval_logits/rejected": -2.599640130996704, "eval_logps/chosen": -359.7169494628906, "eval_logps/rejected": -330.5643615722656, "eval_loss": 0.6502917408943176, "eval_rewards/accuracies": 0.6666666865348816, "eval_rewards/chosen": -0.10357183963060379, "eval_rewards/margins": 0.12311027199029922, "eval_rewards/rejected": -0.22668209671974182, "eval_runtime": 372.9183, "eval_samples_per_second": 5.363, "eval_steps_per_second": 0.169, "step": 500 }, { "epoch": 0.53, "learning_rate": 4.904036482237585e-07, "logits/chosen": -2.6458828449249268, "logits/rejected": -2.524355411529541, "logps/chosen": -375.0708923339844, "logps/rejected": -317.0387878417969, "loss": 0.6448, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.09278295934200287, "rewards/margins": 0.160991370677948, "rewards/rejected": -0.2537743151187897, "step": 510 }, { "epoch": 0.54, "learning_rate": 4.899376298943193e-07, "logits/chosen": -2.5954272747039795, "logits/rejected": -2.545722484588623, "logps/chosen": -318.3174133300781, "logps/rejected": -312.9372863769531, "loss": 0.6473, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09085050970315933, "rewards/margins": 0.15199792385101318, "rewards/rejected": -0.2428484410047531, "step": 520 }, { "epoch": 0.55, "learning_rate": 4.894607949743861e-07, "logits/chosen": -2.581209182739258, "logits/rejected": -2.5345587730407715, "logps/chosen": -355.0510559082031, "logps/rejected": -327.82257080078125, "loss": 0.6446, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.11026673018932343, "rewards/margins": 0.12424556910991669, "rewards/rejected": -0.2345122992992401, "step": 530 }, { "epoch": 0.57, "learning_rate": 4.889731649600786e-07, "logits/chosen": -2.6255240440368652, "logits/rejected": -2.5667052268981934, "logps/chosen": -375.1460876464844, "logps/rejected": -374.27880859375, "loss": 0.6423, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.12368792295455933, "rewards/margins": 0.1251518726348877, "rewards/rejected": -0.24883978068828583, "step": 540 }, { "epoch": 0.58, "learning_rate": 4.884747618341686e-07, "logits/chosen": -2.600996971130371, "logits/rejected": -2.514336585998535, "logps/chosen": -343.29559326171875, "logps/rejected": -321.33917236328125, "loss": 0.651, "rewards/accuracies": 0.65625, "rewards/chosen": -0.1610172837972641, "rewards/margins": 0.13775303959846497, "rewards/rejected": -0.29877036809921265, "step": 550 }, { "epoch": 0.59, "learning_rate": 4.879656080650891e-07, "logits/chosen": -2.6180787086486816, "logits/rejected": -2.528000831604004, "logps/chosen": -340.5784606933594, "logps/rejected": -307.54510498046875, "loss": 0.6388, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.15956440567970276, "rewards/margins": 0.16902579367160797, "rewards/rejected": -0.3285902142524719, "step": 560 }, { "epoch": 0.6, "learning_rate": 4.874457266059209e-07, "logits/chosen": -2.612618923187256, "logits/rejected": -2.5180106163024902, "logps/chosen": -358.2880859375, "logps/rejected": -335.1981506347656, "loss": 0.6475, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.17318308353424072, "rewards/margins": 0.12875264883041382, "rewards/rejected": -0.30193573236465454, "step": 570 }, { "epoch": 0.61, "learning_rate": 4.869151408933583e-07, "logits/chosen": -2.545635223388672, "logits/rejected": -2.465250253677368, "logps/chosen": -351.6809997558594, "logps/rejected": -309.2189636230469, "loss": 0.642, "rewards/accuracies": 0.625, "rewards/chosen": -0.2154681235551834, "rewards/margins": 0.11899904906749725, "rewards/rejected": -0.3344671428203583, "step": 580 }, { "epoch": 0.62, "learning_rate": 4.863738748466519e-07, "logits/chosen": -2.6205108165740967, "logits/rejected": -2.5699057579040527, "logps/chosen": -340.952392578125, "logps/rejected": -325.1321105957031, "loss": 0.6416, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.12603029608726501, "rewards/margins": 0.10710600763559341, "rewards/rejected": -0.23313629627227783, "step": 590 }, { "epoch": 0.63, "learning_rate": 4.858219528665313e-07, "logits/chosen": -2.610783815383911, "logits/rejected": -2.5357155799865723, "logps/chosen": -409.34844970703125, "logps/rejected": -393.9020080566406, "loss": 0.6442, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.11347125470638275, "rewards/margins": 0.1751668006181717, "rewards/rejected": -0.28863808512687683, "step": 600 }, { "epoch": 0.63, "eval_logits/chosen": -2.6208555698394775, "eval_logits/rejected": -2.541459321975708, "eval_logps/chosen": -364.4345397949219, "eval_logps/rejected": -339.30987548828125, "eval_loss": 0.6407224535942078, "eval_rewards/accuracies": 0.6805555820465088, "eval_rewards/chosen": -0.15074825286865234, "eval_rewards/margins": 0.16338865458965302, "eval_rewards/rejected": -0.31413692235946655, "eval_runtime": 378.0153, "eval_samples_per_second": 5.291, "eval_steps_per_second": 0.167, "step": 600 }, { "epoch": 0.64, "learning_rate": 4.852593998341043e-07, "logits/chosen": -2.625915288925171, "logits/rejected": -2.523160457611084, "logps/chosen": -351.0770568847656, "logps/rejected": -295.78399658203125, "loss": 0.6338, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.14120660722255707, "rewards/margins": 0.1647595465183258, "rewards/rejected": -0.30596619844436646, "step": 610 }, { "epoch": 0.65, "learning_rate": 4.846862411097354e-07, "logits/chosen": -2.6131348609924316, "logits/rejected": -2.516840696334839, "logps/chosen": -360.5911865234375, "logps/rejected": -314.7618103027344, "loss": 0.6325, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.21129730343818665, "rewards/margins": 0.1473945826292038, "rewards/rejected": -0.35869190096855164, "step": 620 }, { "epoch": 0.66, "learning_rate": 4.841025025319029e-07, "logits/chosen": -2.4459609985351562, "logits/rejected": -2.3932125568389893, "logps/chosen": -338.36541748046875, "logps/rejected": -334.18218994140625, "loss": 0.6301, "rewards/accuracies": 0.65625, "rewards/chosen": -0.1767289787530899, "rewards/margins": 0.17090250551700592, "rewards/rejected": -0.3476315140724182, "step": 630 }, { "epoch": 0.67, "learning_rate": 4.835082104160337e-07, "logits/chosen": -2.5294649600982666, "logits/rejected": -2.4497077465057373, "logps/chosen": -345.24273681640625, "logps/rejected": -330.613037109375, "loss": 0.6319, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.17142672836780548, "rewards/margins": 0.1845071017742157, "rewards/rejected": -0.3559338450431824, "step": 640 }, { "epoch": 0.68, "learning_rate": 4.829033915533171e-07, "logits/chosen": -2.647000312805176, "logits/rejected": -2.5052175521850586, "logps/chosen": -399.4244689941406, "logps/rejected": -361.20965576171875, "loss": 0.622, "rewards/accuracies": 0.65625, "rewards/chosen": -0.17484009265899658, "rewards/margins": 0.23640482127666473, "rewards/rejected": -0.4112449288368225, "step": 650 }, { "epoch": 0.69, "learning_rate": 4.822880732094967e-07, "logits/chosen": -2.6102538108825684, "logits/rejected": -2.5597729682922363, "logps/chosen": -377.75408935546875, "logps/rejected": -357.68804931640625, "loss": 0.6272, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.183266744017601, "rewards/margins": 0.19729962944984436, "rewards/rejected": -0.38056638836860657, "step": 660 }, { "epoch": 0.7, "learning_rate": 4.81662283123642e-07, "logits/chosen": -2.5665385723114014, "logits/rejected": -2.529106855392456, "logps/chosen": -362.9713134765625, "logps/rejected": -356.2767333984375, "loss": 0.6291, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.19505253434181213, "rewards/margins": 0.19748732447624207, "rewards/rejected": -0.3925398290157318, "step": 670 }, { "epoch": 0.71, "learning_rate": 4.810260495068973e-07, "logits/chosen": -2.485835313796997, "logits/rejected": -2.4558098316192627, "logps/chosen": -342.9903564453125, "logps/rejected": -319.09075927734375, "loss": 0.6333, "rewards/accuracies": 0.59375, "rewards/chosen": -0.22894680500030518, "rewards/margins": 0.08809840679168701, "rewards/rejected": -0.3170451819896698, "step": 680 }, { "epoch": 0.72, "learning_rate": 4.8037940104121e-07, "logits/chosen": -2.5049188137054443, "logits/rejected": -2.4172816276550293, "logps/chosen": -346.4105529785156, "logps/rejected": -328.77423095703125, "loss": 0.6336, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2281329184770584, "rewards/margins": 0.15840637683868408, "rewards/rejected": -0.3865392804145813, "step": 690 }, { "epoch": 0.73, "learning_rate": 4.797223668780377e-07, "logits/chosen": -2.5285067558288574, "logits/rejected": -2.4108242988586426, "logps/chosen": -332.65069580078125, "logps/rejected": -327.6502990722656, "loss": 0.6271, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.2695836126804352, "rewards/margins": 0.19101601839065552, "rewards/rejected": -0.4605995714664459, "step": 700 }, { "epoch": 0.73, "eval_logits/chosen": -2.5658488273620605, "eval_logits/rejected": -2.4835686683654785, "eval_logps/chosen": -373.3324279785156, "eval_logps/rejected": -352.50689697265625, "eval_loss": 0.6320837140083313, "eval_rewards/accuracies": 0.6765872836112976, "eval_rewards/chosen": -0.23972678184509277, "eval_rewards/margins": 0.20638057589530945, "eval_rewards/rejected": -0.4461073875427246, "eval_runtime": 360.7065, "eval_samples_per_second": 5.545, "eval_steps_per_second": 0.175, "step": 700 }, { "epoch": 0.74, "learning_rate": 4.79054976637034e-07, "logits/chosen": -2.5603199005126953, "logits/rejected": -2.4289393424987793, "logps/chosen": -392.89959716796875, "logps/rejected": -323.50262451171875, "loss": 0.6176, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.23820409178733826, "rewards/margins": 0.20737656950950623, "rewards/rejected": -0.4455806612968445, "step": 710 }, { "epoch": 0.75, "learning_rate": 4.783772604047133e-07, "logits/chosen": -2.5404629707336426, "logits/rejected": -2.4736697673797607, "logps/chosen": -374.0919189453125, "logps/rejected": -350.94293212890625, "loss": 0.6356, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.23239894211292267, "rewards/margins": 0.18589885532855988, "rewards/rejected": -0.41829776763916016, "step": 720 }, { "epoch": 0.76, "learning_rate": 4.776892487330943e-07, "logits/chosen": -2.53133225440979, "logits/rejected": -2.422051191329956, "logps/chosen": -380.31622314453125, "logps/rejected": -339.6204528808594, "loss": 0.6308, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.18756382167339325, "rewards/margins": 0.2003902941942215, "rewards/rejected": -0.38795414566993713, "step": 730 }, { "epoch": 0.77, "learning_rate": 4.769909726383226e-07, "logits/chosen": -2.5187153816223145, "logits/rejected": -2.3927228450775146, "logps/chosen": -406.51263427734375, "logps/rejected": -342.09661865234375, "loss": 0.6223, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.15098270773887634, "rewards/margins": 0.2193053960800171, "rewards/rejected": -0.37028807401657104, "step": 740 }, { "epoch": 0.78, "learning_rate": 4.762824635992729e-07, "logits/chosen": -2.530505657196045, "logits/rejected": -2.4965600967407227, "logps/chosen": -370.498779296875, "logps/rejected": -370.7012634277344, "loss": 0.6209, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.261788547039032, "rewards/margins": 0.18216492235660553, "rewards/rejected": -0.4439534544944763, "step": 750 }, { "epoch": 0.8, "learning_rate": 4.755637535561297e-07, "logits/chosen": -2.459725856781006, "logits/rejected": -2.4072091579437256, "logps/chosen": -365.5503845214844, "logps/rejected": -354.66497802734375, "loss": 0.6126, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.37326163053512573, "rewards/margins": 0.21484375, "rewards/rejected": -0.5881053805351257, "step": 760 }, { "epoch": 0.81, "learning_rate": 4.7483487490894716e-07, "logits/chosen": -2.4971468448638916, "logits/rejected": -2.4607887268066406, "logps/chosen": -397.69354248046875, "logps/rejected": -411.0802307128906, "loss": 0.6239, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.37995341420173645, "rewards/margins": 0.20637984573841095, "rewards/rejected": -0.5863332748413086, "step": 770 }, { "epoch": 0.82, "learning_rate": 4.7409586051618866e-07, "logits/chosen": -2.418168306350708, "logits/rejected": -2.357445478439331, "logps/chosen": -344.15997314453125, "logps/rejected": -328.99871826171875, "loss": 0.6249, "rewards/accuracies": 0.625, "rewards/chosen": -0.2923930883407593, "rewards/margins": 0.1856629103422165, "rewards/rejected": -0.47805601358413696, "step": 780 }, { "epoch": 0.83, "learning_rate": 4.733467436932458e-07, "logits/chosen": -2.507992744445801, "logits/rejected": -2.4629783630371094, "logps/chosen": -393.92144775390625, "logps/rejected": -387.3020935058594, "loss": 0.6342, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.2580786347389221, "rewards/margins": 0.2315601110458374, "rewards/rejected": -0.4896388053894043, "step": 790 }, { "epoch": 0.84, "learning_rate": 4.7258755821093583e-07, "logits/chosen": -2.453043222427368, "logits/rejected": -2.361077070236206, "logps/chosen": -436.2608947753906, "logps/rejected": -373.888427734375, "loss": 0.607, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.27933627367019653, "rewards/margins": 0.22823591530323029, "rewards/rejected": -0.5075721740722656, "step": 800 }, { "epoch": 0.84, "eval_logits/chosen": -2.505126953125, "eval_logits/rejected": -2.4199066162109375, "eval_logps/chosen": -379.1497497558594, "eval_logps/rejected": -361.6934509277344, "eval_loss": 0.6261005401611328, "eval_rewards/accuracies": 0.6845238208770752, "eval_rewards/chosen": -0.29789987206459045, "eval_rewards/margins": 0.24007315933704376, "eval_rewards/rejected": -0.5379729866981506, "eval_runtime": 373.2963, "eval_samples_per_second": 5.358, "eval_steps_per_second": 0.169, "step": 800 }, { "epoch": 0.85, "learning_rate": 4.7181833829398005e-07, "logits/chosen": -2.4596476554870605, "logits/rejected": -2.324451446533203, "logps/chosen": -374.5833435058594, "logps/rejected": -325.2947692871094, "loss": 0.6294, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.30218952894210815, "rewards/margins": 0.1286691129207611, "rewards/rejected": -0.4308586120605469, "step": 810 }, { "epoch": 0.86, "learning_rate": 4.7103911861946033e-07, "logits/chosen": -2.3883352279663086, "logits/rejected": -2.3170790672302246, "logps/chosen": -323.11480712890625, "logps/rejected": -317.70635986328125, "loss": 0.6392, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2386181354522705, "rewards/margins": 0.19333642721176147, "rewards/rejected": -0.431954562664032, "step": 820 }, { "epoch": 0.87, "learning_rate": 4.70249934315256e-07, "logits/chosen": -2.4071390628814697, "logits/rejected": -2.3966832160949707, "logps/chosen": -331.5174865722656, "logps/rejected": -332.76898193359375, "loss": 0.6164, "rewards/accuracies": 0.6875, "rewards/chosen": -0.27694451808929443, "rewards/margins": 0.23727154731750488, "rewards/rejected": -0.5142160654067993, "step": 830 }, { "epoch": 0.88, "learning_rate": 4.6945082095846047e-07, "logits/chosen": -2.4078102111816406, "logits/rejected": -2.356518030166626, "logps/chosen": -403.46954345703125, "logps/rejected": -375.2245178222656, "loss": 0.6229, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.3784494996070862, "rewards/margins": 0.12926678359508514, "rewards/rejected": -0.5077162981033325, "step": 840 }, { "epoch": 0.89, "learning_rate": 4.6864181457377695e-07, "logits/chosen": -2.4845261573791504, "logits/rejected": -2.4015376567840576, "logps/chosen": -403.55596923828125, "logps/rejected": -351.72515869140625, "loss": 0.622, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3145061135292053, "rewards/margins": 0.266304612159729, "rewards/rejected": -0.5808106660842896, "step": 850 }, { "epoch": 0.9, "learning_rate": 4.678229516318948e-07, "logits/chosen": -2.483037233352661, "logits/rejected": -2.425265073776245, "logps/chosen": -379.2450866699219, "logps/rejected": -362.83404541015625, "loss": 0.6281, "rewards/accuracies": 0.65625, "rewards/chosen": -0.36860939860343933, "rewards/margins": 0.25606662034988403, "rewards/rejected": -0.6246760487556458, "step": 860 }, { "epoch": 0.91, "learning_rate": 4.6699426904784545e-07, "logits/chosen": -2.4010143280029297, "logits/rejected": -2.367020845413208, "logps/chosen": -338.23992919921875, "logps/rejected": -354.3396301269531, "loss": 0.6117, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.3722101151943207, "rewards/margins": 0.2783013582229614, "rewards/rejected": -0.6505114436149597, "step": 870 }, { "epoch": 0.92, "learning_rate": 4.6615580417933785e-07, "logits/chosen": -2.414269208908081, "logits/rejected": -2.3180108070373535, "logps/chosen": -386.2663269042969, "logps/rejected": -361.62493896484375, "loss": 0.6215, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.43918028473854065, "rewards/margins": 0.2183331698179245, "rewards/rejected": -0.6575134992599487, "step": 880 }, { "epoch": 0.93, "learning_rate": 4.6530759482507466e-07, "logits/chosen": -2.4086251258850098, "logits/rejected": -2.359178066253662, "logps/chosen": -376.55157470703125, "logps/rejected": -377.1549377441406, "loss": 0.6339, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.44110578298568726, "rewards/margins": 0.16434124112129211, "rewards/rejected": -0.605446994304657, "step": 890 }, { "epoch": 0.94, "learning_rate": 4.6444967922304813e-07, "logits/chosen": -2.3653807640075684, "logits/rejected": -2.2835304737091064, "logps/chosen": -407.1882019042969, "logps/rejected": -394.25579833984375, "loss": 0.6322, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.44054508209228516, "rewards/margins": 0.16737648844718933, "rewards/rejected": -0.6079215407371521, "step": 900 }, { "epoch": 0.94, "eval_logits/chosen": -2.4507651329040527, "eval_logits/rejected": -2.364361047744751, "eval_logps/chosen": -397.4640808105469, "eval_logps/rejected": -382.21417236328125, "eval_loss": 0.6199224591255188, "eval_rewards/accuracies": 0.6904761791229248, "eval_rewards/chosen": -0.48104292154312134, "eval_rewards/margins": 0.26213717460632324, "eval_rewards/rejected": -0.7431801557540894, "eval_runtime": 387.4727, "eval_samples_per_second": 5.162, "eval_steps_per_second": 0.163, "step": 900 }, { "epoch": 0.95, "learning_rate": 4.6358209604881637e-07, "logits/chosen": -2.3927271366119385, "logits/rejected": -2.3169281482696533, "logps/chosen": -358.5960388183594, "logps/rejected": -359.0953674316406, "loss": 0.6075, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.5474358797073364, "rewards/margins": 0.20302316546440125, "rewards/rejected": -0.7504590153694153, "step": 910 }, { "epoch": 0.96, "learning_rate": 4.627048844137598e-07, "logits/chosen": -2.4270455837249756, "logits/rejected": -2.3073556423187256, "logps/chosen": -401.132080078125, "logps/rejected": -383.88067626953125, "loss": 0.6136, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.5267983675003052, "rewards/margins": 0.2972319722175598, "rewards/rejected": -0.8240302801132202, "step": 920 }, { "epoch": 0.97, "learning_rate": 4.6181808386331787e-07, "logits/chosen": -2.4496045112609863, "logits/rejected": -2.3281662464141846, "logps/chosen": -384.0579833984375, "logps/rejected": -371.2091369628906, "loss": 0.5891, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.47724461555480957, "rewards/margins": 0.333379864692688, "rewards/rejected": -0.8106244802474976, "step": 930 }, { "epoch": 0.98, "learning_rate": 4.6092173437520666e-07, "logits/chosen": -2.423539638519287, "logits/rejected": -2.3008933067321777, "logps/chosen": -444.1749572753906, "logps/rejected": -432.6753845214844, "loss": 0.6111, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.5619600415229797, "rewards/margins": 0.33264535665512085, "rewards/rejected": -0.8946054577827454, "step": 940 }, { "epoch": 0.99, "learning_rate": 4.600158763576161e-07, "logits/chosen": -2.438096523284912, "logits/rejected": -2.3391449451446533, "logps/chosen": -401.29083251953125, "logps/rejected": -386.20361328125, "loss": 0.6197, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.5298935174942017, "rewards/margins": 0.28705304861068726, "rewards/rejected": -0.8169466257095337, "step": 950 }, { "epoch": 1.0, "learning_rate": 4.591005506473887e-07, "logits/chosen": -2.3625149726867676, "logits/rejected": -2.2783892154693604, "logps/chosen": -371.94158935546875, "logps/rejected": -384.6352233886719, "loss": 0.6026, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.42412814497947693, "rewards/margins": 0.35006412863731384, "rewards/rejected": -0.774192214012146, "step": 960 }, { "epoch": 1.02, "learning_rate": 4.5817579850817884e-07, "logits/chosen": -2.3949708938598633, "logits/rejected": -2.3096823692321777, "logps/chosen": -418.3802795410156, "logps/rejected": -408.92279052734375, "loss": 0.5971, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.4029483199119568, "rewards/margins": 0.32788950204849243, "rewards/rejected": -0.730837881565094, "step": 970 }, { "epoch": 1.03, "learning_rate": 4.572416616285918e-07, "logits/chosen": -2.2977919578552246, "logits/rejected": -2.2567481994628906, "logps/chosen": -355.2760314941406, "logps/rejected": -390.1808776855469, "loss": 0.5833, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.42498597502708435, "rewards/margins": 0.4129951596260071, "rewards/rejected": -0.837981104850769, "step": 980 }, { "epoch": 1.04, "learning_rate": 4.5629818212030525e-07, "logits/chosen": -2.3631339073181152, "logits/rejected": -2.265576124191284, "logps/chosen": -423.474365234375, "logps/rejected": -398.92816162109375, "loss": 0.604, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.4605909287929535, "rewards/margins": 0.31168457865715027, "rewards/rejected": -0.7722755670547485, "step": 990 }, { "epoch": 1.05, "learning_rate": 4.5534540251617013e-07, "logits/chosen": -2.3864855766296387, "logits/rejected": -2.369788408279419, "logps/chosen": -378.5528564453125, "logps/rejected": -388.29364013671875, "loss": 0.605, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.5324582457542419, "rewards/margins": 0.23893216252326965, "rewards/rejected": -0.7713904976844788, "step": 1000 }, { "epoch": 1.05, "eval_logits/chosen": -2.3963613510131836, "eval_logits/rejected": -2.3067517280578613, "eval_logps/chosen": -404.5889892578125, "eval_logps/rejected": -394.02880859375, "eval_loss": 0.6115422248840332, "eval_rewards/accuracies": 0.6884920597076416, "eval_rewards/chosen": -0.5522919297218323, "eval_rewards/margins": 0.30903440713882446, "eval_rewards/rejected": -0.8613263368606567, "eval_runtime": 345.5833, "eval_samples_per_second": 5.787, "eval_steps_per_second": 0.182, "step": 1000 }, { "epoch": 1.06, "learning_rate": 4.5438336576829377e-07, "logits/chosen": -2.3662519454956055, "logits/rejected": -2.2876665592193604, "logps/chosen": -418.6935119628906, "logps/rejected": -390.25067138671875, "loss": 0.596, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.5613728761672974, "rewards/margins": 0.25794973969459534, "rewards/rejected": -0.8193224668502808, "step": 1010 }, { "epoch": 1.07, "learning_rate": 4.5341211524610323e-07, "logits/chosen": -2.353506565093994, "logits/rejected": -2.3161935806274414, "logps/chosen": -407.1302795410156, "logps/rejected": -410.1849670410156, "loss": 0.5985, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.5053799748420715, "rewards/margins": 0.3071553409099579, "rewards/rejected": -0.8125354051589966, "step": 1020 }, { "epoch": 1.08, "learning_rate": 4.5243169473439026e-07, "logits/chosen": -2.2898788452148438, "logits/rejected": -2.24770188331604, "logps/chosen": -371.4761962890625, "logps/rejected": -374.82989501953125, "loss": 0.5841, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.4240152835845947, "rewards/margins": 0.37721922993659973, "rewards/rejected": -0.8012345433235168, "step": 1030 }, { "epoch": 1.09, "learning_rate": 4.5144214843133753e-07, "logits/chosen": -2.280208110809326, "logits/rejected": -2.2782938480377197, "logps/chosen": -369.32598876953125, "logps/rejected": -416.7386779785156, "loss": 0.6018, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.4348181188106537, "rewards/margins": 0.3252604603767395, "rewards/rejected": -0.7600786089897156, "step": 1040 }, { "epoch": 1.1, "learning_rate": 4.5044352094652603e-07, "logits/chosen": -2.3721535205841064, "logits/rejected": -2.2657477855682373, "logps/chosen": -398.3066101074219, "logps/rejected": -372.4281921386719, "loss": 0.5902, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.4137346148490906, "rewards/margins": 0.3253127634525299, "rewards/rejected": -0.7390474081039429, "step": 1050 }, { "epoch": 1.11, "learning_rate": 4.494358572989241e-07, "logits/chosen": -2.3646328449249268, "logits/rejected": -2.1730899810791016, "logps/chosen": -439.25579833984375, "logps/rejected": -406.27655029296875, "loss": 0.5674, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.4350808262825012, "rewards/margins": 0.45484787225723267, "rewards/rejected": -0.8899286389350891, "step": 1060 }, { "epoch": 1.12, "learning_rate": 4.484192029148578e-07, "logits/chosen": -2.313396692276001, "logits/rejected": -2.204408645629883, "logps/chosen": -376.7505798339844, "logps/rejected": -346.17791748046875, "loss": 0.5977, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.5117573738098145, "rewards/margins": 0.32838425040245056, "rewards/rejected": -0.8401415944099426, "step": 1070 }, { "epoch": 1.13, "learning_rate": 4.4739360362596336e-07, "logits/chosen": -2.273745059967041, "logits/rejected": -2.2262158393859863, "logps/chosen": -369.76641845703125, "logps/rejected": -395.5487976074219, "loss": 0.5952, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.546228289604187, "rewards/margins": 0.31487131118774414, "rewards/rejected": -0.8610996007919312, "step": 1080 }, { "epoch": 1.14, "learning_rate": 4.4635910566712073e-07, "logits/chosen": -2.3198351860046387, "logits/rejected": -2.2201361656188965, "logps/chosen": -424.00286865234375, "logps/rejected": -421.1241760253906, "loss": 0.574, "rewards/accuracies": 0.71875, "rewards/chosen": -0.595988929271698, "rewards/margins": 0.38041016459465027, "rewards/rejected": -0.9763991236686707, "step": 1090 }, { "epoch": 1.15, "learning_rate": 4.4531575567436933e-07, "logits/chosen": -2.3476712703704834, "logits/rejected": -2.268463134765625, "logps/chosen": -410.62371826171875, "logps/rejected": -415.69256591796875, "loss": 0.601, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.7680894136428833, "rewards/margins": 0.27382129430770874, "rewards/rejected": -1.0419107675552368, "step": 1100 }, { "epoch": 1.15, "eval_logits/chosen": -2.3601648807525635, "eval_logits/rejected": -2.2683041095733643, "eval_logps/chosen": -418.7676696777344, "eval_logps/rejected": -411.0064697265625, "eval_loss": 0.6067742705345154, "eval_rewards/accuracies": 0.6964285969734192, "eval_rewards/chosen": -0.6940793991088867, "eval_rewards/margins": 0.33702388405799866, "eval_rewards/rejected": -1.031103253364563, "eval_runtime": 356.6096, "eval_samples_per_second": 5.608, "eval_steps_per_second": 0.177, "step": 1100 }, { "epoch": 1.16, "learning_rate": 4.44263600682806e-07, "logits/chosen": -2.357461452484131, "logits/rejected": -2.2750308513641357, "logps/chosen": -418.9007263183594, "logps/rejected": -407.72772216796875, "loss": 0.5931, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.6749259233474731, "rewards/margins": 0.2714986801147461, "rewards/rejected": -0.9464246034622192, "step": 1110 }, { "epoch": 1.17, "learning_rate": 4.4320268812446404e-07, "logits/chosen": -2.371415615081787, "logits/rejected": -2.2759017944335938, "logps/chosen": -417.7850036621094, "logps/rejected": -398.28692626953125, "loss": 0.5898, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.5170518159866333, "rewards/margins": 0.35224297642707825, "rewards/rejected": -0.8692947626113892, "step": 1120 }, { "epoch": 1.18, "learning_rate": 4.421330658261754e-07, "logits/chosen": -2.32688570022583, "logits/rejected": -2.2558743953704834, "logps/chosen": -387.0340270996094, "logps/rejected": -385.77984619140625, "loss": 0.5755, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.3961396813392639, "rewards/margins": 0.3396463990211487, "rewards/rejected": -0.7357860803604126, "step": 1130 }, { "epoch": 1.19, "learning_rate": 4.410547820074143e-07, "logits/chosen": -2.3766913414001465, "logits/rejected": -2.2579758167266846, "logps/chosen": -411.9817810058594, "logps/rejected": -376.52716064453125, "loss": 0.5798, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.43211793899536133, "rewards/margins": 0.41633152961730957, "rewards/rejected": -0.8484494090080261, "step": 1140 }, { "epoch": 1.2, "learning_rate": 4.399678852781238e-07, "logits/chosen": -2.342559337615967, "logits/rejected": -2.266874074935913, "logps/chosen": -410.984619140625, "logps/rejected": -401.4251403808594, "loss": 0.5879, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.5716805458068848, "rewards/margins": 0.32295480370521545, "rewards/rejected": -0.8946353197097778, "step": 1150 }, { "epoch": 1.21, "learning_rate": 4.3887242463652415e-07, "logits/chosen": -2.3485589027404785, "logits/rejected": -2.269087791442871, "logps/chosen": -400.2742004394531, "logps/rejected": -413.7886657714844, "loss": 0.5823, "rewards/accuracies": 0.6875, "rewards/chosen": -0.5351490378379822, "rewards/margins": 0.39718011021614075, "rewards/rejected": -0.9323290586471558, "step": 1160 }, { "epoch": 1.22, "learning_rate": 4.3776844946690385e-07, "logits/chosen": -2.3736624717712402, "logits/rejected": -2.2624993324279785, "logps/chosen": -424.0856018066406, "logps/rejected": -380.76812744140625, "loss": 0.5792, "rewards/accuracies": 0.6875, "rewards/chosen": -0.48149457573890686, "rewards/margins": 0.3061677813529968, "rewards/rejected": -0.7876623868942261, "step": 1170 }, { "epoch": 1.23, "learning_rate": 4.3665600953739367e-07, "logits/chosen": -2.313255548477173, "logits/rejected": -2.192188024520874, "logps/chosen": -404.3397216796875, "logps/rejected": -371.1601257324219, "loss": 0.5742, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.491854190826416, "rewards/margins": 0.3715011477470398, "rewards/rejected": -0.8633554577827454, "step": 1180 }, { "epoch": 1.25, "learning_rate": 4.3553515499772285e-07, "logits/chosen": -2.393124580383301, "logits/rejected": -2.2997257709503174, "logps/chosen": -403.5997619628906, "logps/rejected": -397.8185729980469, "loss": 0.5659, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5112482309341431, "rewards/margins": 0.4146638512611389, "rewards/rejected": -0.925912082195282, "step": 1190 }, { "epoch": 1.26, "learning_rate": 4.344059363769583e-07, "logits/chosen": -2.329709529876709, "logits/rejected": -2.240239381790161, "logps/chosen": -423.2294006347656, "logps/rejected": -421.49468994140625, "loss": 0.5676, "rewards/accuracies": 0.71875, "rewards/chosen": -0.5605155229568481, "rewards/margins": 0.39043912291526794, "rewards/rejected": -0.9509545564651489, "step": 1200 }, { "epoch": 1.26, "eval_logits/chosen": -2.3216235637664795, "eval_logits/rejected": -2.2290165424346924, "eval_logps/chosen": -417.0859375, "eval_logps/rejected": -411.97637939453125, "eval_loss": 0.6020426154136658, "eval_rewards/accuracies": 0.7123016119003296, "eval_rewards/chosen": -0.677262008190155, "eval_rewards/margins": 0.36354002356529236, "eval_rewards/rejected": -1.040802001953125, "eval_runtime": 368.9191, "eval_samples_per_second": 5.421, "eval_steps_per_second": 0.171, "step": 1200 }, { "epoch": 1.27, "learning_rate": 4.332684045812268e-07, "logits/chosen": -2.3038039207458496, "logits/rejected": -2.197749614715576, "logps/chosen": -371.4241943359375, "logps/rejected": -387.53070068359375, "loss": 0.5788, "rewards/accuracies": 0.6875, "rewards/chosen": -0.5937258005142212, "rewards/margins": 0.3397255539894104, "rewards/rejected": -0.9334513545036316, "step": 1210 }, { "epoch": 1.28, "learning_rate": 4.3212261089142e-07, "logits/chosen": -2.328768253326416, "logits/rejected": -2.1700007915496826, "logps/chosen": -417.7594299316406, "logps/rejected": -393.429443359375, "loss": 0.59, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.4774077832698822, "rewards/margins": 0.405425488948822, "rewards/rejected": -0.8828333020210266, "step": 1220 }, { "epoch": 1.29, "learning_rate": 4.3096860696088267e-07, "logits/chosen": -2.322392463684082, "logits/rejected": -2.1980550289154053, "logps/chosen": -430.95068359375, "logps/rejected": -409.2894592285156, "loss": 0.5845, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.521595299243927, "rewards/margins": 0.3559170365333557, "rewards/rejected": -0.8775123357772827, "step": 1230 }, { "epoch": 1.3, "learning_rate": 4.2980644481308426e-07, "logits/chosen": -2.23865008354187, "logits/rejected": -2.2324957847595215, "logps/chosen": -385.8111877441406, "logps/rejected": -396.346435546875, "loss": 0.6033, "rewards/accuracies": 0.59375, "rewards/chosen": -0.6240901947021484, "rewards/margins": 0.3070584237575531, "rewards/rejected": -0.9311486482620239, "step": 1240 }, { "epoch": 1.31, "learning_rate": 4.286361768392734e-07, "logits/chosen": -2.2613332271575928, "logits/rejected": -2.18135404586792, "logps/chosen": -415.98651123046875, "logps/rejected": -406.7262268066406, "loss": 0.5709, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.6372745633125305, "rewards/margins": 0.37148743867874146, "rewards/rejected": -1.008762001991272, "step": 1250 }, { "epoch": 1.32, "learning_rate": 4.2745785579611636e-07, "logits/chosen": -2.216391086578369, "logits/rejected": -2.1812686920166016, "logps/chosen": -363.19464111328125, "logps/rejected": -382.83489990234375, "loss": 0.5881, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.6171839237213135, "rewards/margins": 0.3068179488182068, "rewards/rejected": -0.9240018725395203, "step": 1260 }, { "epoch": 1.33, "learning_rate": 4.262715348033184e-07, "logits/chosen": -2.2606654167175293, "logits/rejected": -2.183107852935791, "logps/chosen": -382.5652770996094, "logps/rejected": -387.09539794921875, "loss": 0.5615, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.4672677516937256, "rewards/margins": 0.40395697951316833, "rewards/rejected": -0.871224582195282, "step": 1270 }, { "epoch": 1.34, "learning_rate": 4.2507726734122927e-07, "logits/chosen": -2.3232672214508057, "logits/rejected": -2.2005207538604736, "logps/chosen": -399.9627685546875, "logps/rejected": -384.1025390625, "loss": 0.5709, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.4604893624782562, "rewards/margins": 0.41125577688217163, "rewards/rejected": -0.871745228767395, "step": 1280 }, { "epoch": 1.35, "learning_rate": 4.2387510724843243e-07, "logits/chosen": -2.278716564178467, "logits/rejected": -2.1945688724517822, "logps/chosen": -405.977783203125, "logps/rejected": -400.54888916015625, "loss": 0.5861, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.5321739912033081, "rewards/margins": 0.38776397705078125, "rewards/rejected": -0.9199379682540894, "step": 1290 }, { "epoch": 1.36, "learning_rate": 4.226651087193175e-07, "logits/chosen": -2.2307355403900146, "logits/rejected": -2.2070822715759277, "logps/chosen": -383.90289306640625, "logps/rejected": -393.69342041015625, "loss": 0.5909, "rewards/accuracies": 0.6875, "rewards/chosen": -0.6051900386810303, "rewards/margins": 0.3618764281272888, "rewards/rejected": -0.9670664668083191, "step": 1300 }, { "epoch": 1.36, "eval_logits/chosen": -2.29123854637146, "eval_logits/rejected": -2.1982269287109375, "eval_logps/chosen": -412.9469909667969, "eval_logps/rejected": -408.3128356933594, "eval_loss": 0.5999146699905396, "eval_rewards/accuracies": 0.7123016119003296, "eval_rewards/chosen": -0.6358725428581238, "eval_rewards/margins": 0.368294358253479, "eval_rewards/rejected": -1.0041669607162476, "eval_runtime": 359.7432, "eval_samples_per_second": 5.56, "eval_steps_per_second": 0.175, "step": 1300 }, { "epoch": 1.37, "learning_rate": 4.214473263016376e-07, "logits/chosen": -2.2382800579071045, "logits/rejected": -2.144857883453369, "logps/chosen": -382.93292236328125, "logps/rejected": -396.829345703125, "loss": 0.5854, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.5666841268539429, "rewards/margins": 0.36565086245536804, "rewards/rejected": -0.9323350191116333, "step": 1310 }, { "epoch": 1.38, "learning_rate": 4.2022181489405005e-07, "logits/chosen": -2.2324352264404297, "logits/rejected": -2.1366093158721924, "logps/chosen": -384.2945251464844, "logps/rejected": -413.99041748046875, "loss": 0.5728, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.6076509952545166, "rewards/margins": 0.41816553473472595, "rewards/rejected": -1.025816559791565, "step": 1320 }, { "epoch": 1.39, "learning_rate": 4.189886297436416e-07, "logits/chosen": -2.208909511566162, "logits/rejected": -2.137064218521118, "logps/chosen": -418.9502868652344, "logps/rejected": -436.11090087890625, "loss": 0.5882, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.6793769001960754, "rewards/margins": 0.40103235840797424, "rewards/rejected": -1.080409288406372, "step": 1330 }, { "epoch": 1.4, "learning_rate": 4.177478264434375e-07, "logits/chosen": -2.2093963623046875, "logits/rejected": -2.14264178276062, "logps/chosen": -392.21478271484375, "logps/rejected": -403.21063232421875, "loss": 0.6091, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.6416140794754028, "rewards/margins": 0.3692830204963684, "rewards/rejected": -1.0108970403671265, "step": 1340 }, { "epoch": 1.41, "learning_rate": 4.164994609298962e-07, "logits/chosen": -2.1711971759796143, "logits/rejected": -2.1330151557922363, "logps/chosen": -351.8862609863281, "logps/rejected": -370.3506774902344, "loss": 0.5698, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.5294076204299927, "rewards/margins": 0.3314458429813385, "rewards/rejected": -0.860853374004364, "step": 1350 }, { "epoch": 1.42, "learning_rate": 4.1524358948038664e-07, "logits/chosen": -2.250774383544922, "logits/rejected": -2.1088974475860596, "logps/chosen": -424.53668212890625, "logps/rejected": -390.82122802734375, "loss": 0.5885, "rewards/accuracies": 0.65625, "rewards/chosen": -0.5655269026756287, "rewards/margins": 0.29255813360214233, "rewards/rejected": -0.858085036277771, "step": 1360 }, { "epoch": 1.43, "learning_rate": 4.139802687106516e-07, "logits/chosen": -2.3377394676208496, "logits/rejected": -2.1961159706115723, "logps/chosen": -416.22869873046875, "logps/rejected": -389.2613220214844, "loss": 0.5645, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.5650314092636108, "rewards/margins": 0.42392808198928833, "rewards/rejected": -0.9889594912528992, "step": 1370 }, { "epoch": 1.44, "learning_rate": 4.1270955557225596e-07, "logits/chosen": -2.244158983230591, "logits/rejected": -2.1470203399658203, "logps/chosen": -397.0245056152344, "logps/rejected": -449.65704345703125, "loss": 0.5523, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.5677224397659302, "rewards/margins": 0.4837714731693268, "rewards/rejected": -1.0514938831329346, "step": 1380 }, { "epoch": 1.45, "learning_rate": 4.1143150735001835e-07, "logits/chosen": -2.212290048599243, "logits/rejected": -2.181854009628296, "logps/chosen": -401.3909606933594, "logps/rejected": -396.84222412109375, "loss": 0.579, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5991008877754211, "rewards/margins": 0.39938944578170776, "rewards/rejected": -0.9984903335571289, "step": 1390 }, { "epoch": 1.47, "learning_rate": 4.1014618165942936e-07, "logits/chosen": -2.2670254707336426, "logits/rejected": -2.1317477226257324, "logps/chosen": -434.9769592285156, "logps/rejected": -407.5884704589844, "loss": 0.5711, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.6577389240264893, "rewards/margins": 0.49383634328842163, "rewards/rejected": -1.1515752077102661, "step": 1400 }, { "epoch": 1.47, "eval_logits/chosen": -2.2460079193115234, "eval_logits/rejected": -2.1507139205932617, "eval_logps/chosen": -420.5697326660156, "eval_logps/rejected": -419.07220458984375, "eval_loss": 0.5966773629188538, "eval_rewards/accuracies": 0.7182539701461792, "eval_rewards/chosen": -0.7120997905731201, "eval_rewards/margins": 0.39966049790382385, "eval_rewards/rejected": -1.1117603778839111, "eval_runtime": 352.9373, "eval_samples_per_second": 5.667, "eval_steps_per_second": 0.179, "step": 1400 }, { "epoch": 1.48, "learning_rate": 4.088536364440541e-07, "logits/chosen": -2.219907283782959, "logits/rejected": -2.084876537322998, "logps/chosen": -438.87921142578125, "logps/rejected": -417.52349853515625, "loss": 0.5658, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.705518364906311, "rewards/margins": 0.49317169189453125, "rewards/rejected": -1.1986901760101318, "step": 1410 }, { "epoch": 1.49, "learning_rate": 4.075539299729196e-07, "logits/chosen": -2.19868803024292, "logits/rejected": -2.1398653984069824, "logps/chosen": -422.9442443847656, "logps/rejected": -429.850830078125, "loss": 0.5771, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.6790863275527954, "rewards/margins": 0.3902955949306488, "rewards/rejected": -1.0693819522857666, "step": 1420 }, { "epoch": 1.5, "learning_rate": 4.062471208378886e-07, "logits/chosen": -2.1475436687469482, "logits/rejected": -2.0641520023345947, "logps/chosen": -410.59124755859375, "logps/rejected": -400.1944885253906, "loss": 0.5804, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.6431189775466919, "rewards/margins": 0.3516607880592346, "rewards/rejected": -0.9947795867919922, "step": 1430 }, { "epoch": 1.51, "learning_rate": 4.049332679510178e-07, "logits/chosen": -2.243961811065674, "logits/rejected": -2.1046929359436035, "logps/chosen": -425.0006408691406, "logps/rejected": -418.7171936035156, "loss": 0.569, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.53559809923172, "rewards/margins": 0.44754093885421753, "rewards/rejected": -0.983138918876648, "step": 1440 }, { "epoch": 1.52, "learning_rate": 4.036124305419024e-07, "logits/chosen": -2.165278911590576, "logits/rejected": -2.0803096294403076, "logps/chosen": -406.1650695800781, "logps/rejected": -402.72027587890625, "loss": 0.5734, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.6142688989639282, "rewards/margins": 0.37497222423553467, "rewards/rejected": -0.9892411231994629, "step": 1450 }, { "epoch": 1.53, "learning_rate": 4.0228466815500535e-07, "logits/chosen": -2.2216262817382812, "logits/rejected": -2.0967283248901367, "logps/chosen": -418.2197265625, "logps/rejected": -391.4878234863281, "loss": 0.5574, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.5702084302902222, "rewards/margins": 0.42537397146224976, "rewards/rejected": -0.9955822825431824, "step": 1460 }, { "epoch": 1.54, "learning_rate": 4.009500406469737e-07, "logits/chosen": -2.242321729660034, "logits/rejected": -2.18538761138916, "logps/chosen": -413.0029296875, "logps/rejected": -424.19427490234375, "loss": 0.5851, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.619260847568512, "rewards/margins": 0.3288739323616028, "rewards/rejected": -0.9481347799301147, "step": 1470 }, { "epoch": 1.55, "learning_rate": 3.996086081839399e-07, "logits/chosen": -2.2441189289093018, "logits/rejected": -2.1407032012939453, "logps/chosen": -440.66827392578125, "logps/rejected": -419.3600158691406, "loss": 0.5616, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.5705949664115906, "rewards/margins": 0.4493609368801117, "rewards/rejected": -1.0199559926986694, "step": 1480 }, { "epoch": 1.56, "learning_rate": 3.982604312388096e-07, "logits/chosen": -2.1618101596832275, "logits/rejected": -2.077331066131592, "logps/chosen": -406.7264099121094, "logps/rejected": -422.53631591796875, "loss": 0.5731, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.5052849054336548, "rewards/margins": 0.5251529216766357, "rewards/rejected": -1.030437707901001, "step": 1490 }, { "epoch": 1.57, "learning_rate": 3.969055705885351e-07, "logits/chosen": -2.152574300765991, "logits/rejected": -2.0879902839660645, "logps/chosen": -394.4706726074219, "logps/rejected": -436.9656677246094, "loss": 0.5655, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.6157764792442322, "rewards/margins": 0.4567118287086487, "rewards/rejected": -1.0724884271621704, "step": 1500 }, { "epoch": 1.57, "eval_logits/chosen": -2.2211546897888184, "eval_logits/rejected": -2.1252570152282715, "eval_logps/chosen": -412.4960632324219, "eval_logps/rejected": -410.0142517089844, "eval_loss": 0.5956543684005737, "eval_rewards/accuracies": 0.7222222089767456, "eval_rewards/chosen": -0.6313630938529968, "eval_rewards/margins": 0.3898184597492218, "eval_rewards/rejected": -1.0211814641952515, "eval_runtime": 365.7512, "eval_samples_per_second": 5.468, "eval_steps_per_second": 0.172, "step": 1500 }, { "epoch": 1.58, "learning_rate": 3.9554408731137604e-07, "logits/chosen": -2.1627113819122314, "logits/rejected": -2.1003527641296387, "logps/chosen": -392.1766662597656, "logps/rejected": -400.1446228027344, "loss": 0.5686, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.6373321413993835, "rewards/margins": 0.4628276824951172, "rewards/rejected": -1.100159764289856, "step": 1510 }, { "epoch": 1.59, "learning_rate": 3.9417604278414556e-07, "logits/chosen": -2.209413766860962, "logits/rejected": -2.105988025665283, "logps/chosen": -438.8935546875, "logps/rejected": -427.12225341796875, "loss": 0.5667, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.7425927519798279, "rewards/margins": 0.4536716043949127, "rewards/rejected": -1.1962645053863525, "step": 1520 }, { "epoch": 1.6, "learning_rate": 3.9280149867944335e-07, "logits/chosen": -2.132628917694092, "logits/rejected": -2.042515754699707, "logps/chosen": -395.2568664550781, "logps/rejected": -395.38031005859375, "loss": 0.5581, "rewards/accuracies": 0.71875, "rewards/chosen": -0.6433058977127075, "rewards/margins": 0.44076013565063477, "rewards/rejected": -1.0840660333633423, "step": 1530 }, { "epoch": 1.61, "learning_rate": 3.9142051696287583e-07, "logits/chosen": -2.2434608936309814, "logits/rejected": -2.1431119441986084, "logps/chosen": -451.15728759765625, "logps/rejected": -441.5335388183594, "loss": 0.5773, "rewards/accuracies": 0.75, "rewards/chosen": -0.6491286158561707, "rewards/margins": 0.4809587001800537, "rewards/rejected": -1.1300873756408691, "step": 1540 }, { "epoch": 1.62, "learning_rate": 3.900331598902621e-07, "logits/chosen": -2.1666946411132812, "logits/rejected": -2.090304374694824, "logps/chosen": -425.1583557128906, "logps/rejected": -407.2892761230469, "loss": 0.5567, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.641929566860199, "rewards/margins": 0.35586634278297424, "rewards/rejected": -0.9977958798408508, "step": 1550 }, { "epoch": 1.63, "learning_rate": 3.8863949000482774e-07, "logits/chosen": -2.1718239784240723, "logits/rejected": -2.112691879272461, "logps/chosen": -367.64971923828125, "logps/rejected": -395.51373291015625, "loss": 0.5745, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.5719778537750244, "rewards/margins": 0.37684187293052673, "rewards/rejected": -0.9488197565078735, "step": 1560 }, { "epoch": 1.64, "learning_rate": 3.872395701343854e-07, "logits/chosen": -2.142659902572632, "logits/rejected": -2.0468955039978027, "logps/chosen": -432.7139587402344, "logps/rejected": -418.50079345703125, "loss": 0.5754, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.6141091585159302, "rewards/margins": 0.381761372089386, "rewards/rejected": -0.9958705902099609, "step": 1570 }, { "epoch": 1.65, "learning_rate": 3.8583346338850217e-07, "logits/chosen": -2.1769824028015137, "logits/rejected": -2.167893886566162, "logps/chosen": -383.44659423828125, "logps/rejected": -440.98748779296875, "loss": 0.5678, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.5885382890701294, "rewards/margins": 0.5099713802337646, "rewards/rejected": -1.098509669303894, "step": 1580 }, { "epoch": 1.66, "learning_rate": 3.8442123315565477e-07, "logits/chosen": -2.0826640129089355, "logits/rejected": -2.0363707542419434, "logps/chosen": -391.0645751953125, "logps/rejected": -404.0660095214844, "loss": 0.5686, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.6791858673095703, "rewards/margins": 0.4309251308441162, "rewards/rejected": -1.1101109981536865, "step": 1590 }, { "epoch": 1.67, "learning_rate": 3.830029431003718e-07, "logits/chosen": -2.1413321495056152, "logits/rejected": -2.0600619316101074, "logps/chosen": -392.31353759765625, "logps/rejected": -388.4493103027344, "loss": 0.5655, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.61674964427948, "rewards/margins": 0.5000275373458862, "rewards/rejected": -1.1167770624160767, "step": 1600 }, { "epoch": 1.67, "eval_logits/chosen": -2.1858322620391846, "eval_logits/rejected": -2.0877087116241455, "eval_logps/chosen": -414.4089660644531, "eval_logps/rejected": -414.78515625, "eval_loss": 0.5924570560455322, "eval_rewards/accuracies": 0.7242063283920288, "eval_rewards/chosen": -0.6504923701286316, "eval_rewards/margins": 0.41839832067489624, "eval_rewards/rejected": -1.0688906908035278, "eval_runtime": 373.9388, "eval_samples_per_second": 5.348, "eval_steps_per_second": 0.168, "step": 1600 }, { "epoch": 1.68, "learning_rate": 3.81578657160364e-07, "logits/chosen": -2.0504841804504395, "logits/rejected": -2.0568032264709473, "logps/chosen": -396.2165222167969, "logps/rejected": -428.9229431152344, "loss": 0.5529, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.6419362425804138, "rewards/margins": 0.47833889722824097, "rewards/rejected": -1.1202751398086548, "step": 1610 }, { "epoch": 1.7, "learning_rate": 3.801484395436412e-07, "logits/chosen": -2.210151195526123, "logits/rejected": -2.111720561981201, "logps/chosen": -431.15216064453125, "logps/rejected": -404.6205139160156, "loss": 0.5567, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.7074218392372131, "rewards/margins": 0.4450675845146179, "rewards/rejected": -1.152489423751831, "step": 1620 }, { "epoch": 1.71, "learning_rate": 3.787123547256185e-07, "logits/chosen": -2.1102566719055176, "logits/rejected": -2.0364174842834473, "logps/chosen": -426.64361572265625, "logps/rejected": -438.0272521972656, "loss": 0.547, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.7283273339271545, "rewards/margins": 0.42402735352516174, "rewards/rejected": -1.1523545980453491, "step": 1630 }, { "epoch": 1.72, "learning_rate": 3.7727046744620953e-07, "logits/chosen": -2.1615149974823, "logits/rejected": -2.052468776702881, "logps/chosen": -399.452880859375, "logps/rejected": -403.0694580078125, "loss": 0.5675, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.5816777944564819, "rewards/margins": 0.4937531352043152, "rewards/rejected": -1.075430989265442, "step": 1640 }, { "epoch": 1.73, "learning_rate": 3.7582284270690747e-07, "logits/chosen": -2.19050931930542, "logits/rejected": -2.093792200088501, "logps/chosen": -443.578857421875, "logps/rejected": -413.1700134277344, "loss": 0.5751, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.643337607383728, "rewards/margins": 0.384897381067276, "rewards/rejected": -1.0282350778579712, "step": 1650 }, { "epoch": 1.74, "learning_rate": 3.7436954576785503e-07, "logits/chosen": -2.136133909225464, "logits/rejected": -2.069423198699951, "logps/chosen": -379.0152587890625, "logps/rejected": -400.8043518066406, "loss": 0.5684, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.6269843578338623, "rewards/margins": 0.5023621916770935, "rewards/rejected": -1.1293466091156006, "step": 1660 }, { "epoch": 1.75, "learning_rate": 3.7291064214490274e-07, "logits/chosen": -2.18449068069458, "logits/rejected": -2.0997793674468994, "logps/chosen": -401.9382019042969, "logps/rejected": -396.7825012207031, "loss": 0.5718, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.5713291764259338, "rewards/margins": 0.4387635290622711, "rewards/rejected": -1.0100927352905273, "step": 1670 }, { "epoch": 1.76, "learning_rate": 3.714461976066549e-07, "logits/chosen": -2.199491500854492, "logits/rejected": -2.050266742706299, "logps/chosen": -434.72210693359375, "logps/rejected": -408.1536560058594, "loss": 0.5647, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.5964989066123962, "rewards/margins": 0.4263014793395996, "rewards/rejected": -1.0228004455566406, "step": 1680 }, { "epoch": 1.77, "learning_rate": 3.699762781715051e-07, "logits/chosen": -2.078326463699341, "logits/rejected": -2.058079957962036, "logps/chosen": -373.3815002441406, "logps/rejected": -401.06793212890625, "loss": 0.5689, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5760546922683716, "rewards/margins": 0.4842708110809326, "rewards/rejected": -1.0603255033493042, "step": 1690 }, { "epoch": 1.78, "learning_rate": 3.6850095010465976e-07, "logits/chosen": -2.146766185760498, "logits/rejected": -2.088407516479492, "logps/chosen": -408.16387939453125, "logps/rejected": -418.07513427734375, "loss": 0.5364, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5772097706794739, "rewards/margins": 0.573888897895813, "rewards/rejected": -1.1510984897613525, "step": 1700 }, { "epoch": 1.78, "eval_logits/chosen": -2.149921417236328, "eval_logits/rejected": -2.050013780593872, "eval_logps/chosen": -425.4825134277344, "eval_logps/rejected": -428.4342041015625, "eval_loss": 0.5872865915298462, "eval_rewards/accuracies": 0.726190447807312, "eval_rewards/chosen": -0.7612276077270508, "eval_rewards/margins": 0.44415298104286194, "eval_rewards/rejected": -1.2053806781768799, "eval_runtime": 358.9295, "eval_samples_per_second": 5.572, "eval_steps_per_second": 0.176, "step": 1700 }, { "epoch": 1.79, "learning_rate": 3.670202799151511e-07, "logits/chosen": -2.130225658416748, "logits/rejected": -2.0438361167907715, "logps/chosen": -441.6221618652344, "logps/rejected": -462.5069274902344, "loss": 0.5669, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.7283957004547119, "rewards/margins": 0.4606415331363678, "rewards/rejected": -1.1890372037887573, "step": 1710 }, { "epoch": 1.8, "learning_rate": 3.6553433435283863e-07, "logits/chosen": -2.1264588832855225, "logits/rejected": -2.0698792934417725, "logps/chosen": -391.79986572265625, "logps/rejected": -420.40899658203125, "loss": 0.5828, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.7596856355667114, "rewards/margins": 0.4628881812095642, "rewards/rejected": -1.22257399559021, "step": 1720 }, { "epoch": 1.81, "learning_rate": 3.640431804054002e-07, "logits/chosen": -2.1733834743499756, "logits/rejected": -2.100900411605835, "logps/chosen": -413.070068359375, "logps/rejected": -454.635009765625, "loss": 0.5688, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.6341695189476013, "rewards/margins": 0.45797285437583923, "rewards/rejected": -1.0921423435211182, "step": 1730 }, { "epoch": 1.82, "learning_rate": 3.6254688529531195e-07, "logits/chosen": -2.187265396118164, "logits/rejected": -2.078583240509033, "logps/chosen": -394.0973815917969, "logps/rejected": -415.5884704589844, "loss": 0.5537, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.6361076831817627, "rewards/margins": 0.5312715768814087, "rewards/rejected": -1.1673791408538818, "step": 1740 }, { "epoch": 1.83, "learning_rate": 3.610455164768181e-07, "logits/chosen": -2.1528546810150146, "logits/rejected": -2.012460947036743, "logps/chosen": -445.7840270996094, "logps/rejected": -415.9200744628906, "loss": 0.5585, "rewards/accuracies": 0.75, "rewards/chosen": -0.7289555668830872, "rewards/margins": 0.4902923107147217, "rewards/rejected": -1.219247817993164, "step": 1750 }, { "epoch": 1.84, "learning_rate": 3.595391416328897e-07, "logits/chosen": -2.0355443954467773, "logits/rejected": -1.9784704446792603, "logps/chosen": -363.8067626953125, "logps/rejected": -387.1481628417969, "loss": 0.5571, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.7023047208786011, "rewards/margins": 0.4179447293281555, "rewards/rejected": -1.1202495098114014, "step": 1760 }, { "epoch": 1.85, "learning_rate": 3.580278286721738e-07, "logits/chosen": -2.1093432903289795, "logits/rejected": -2.0327229499816895, "logps/chosen": -422.77447509765625, "logps/rejected": -428.30035400390625, "loss": 0.5745, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.6180187463760376, "rewards/margins": 0.4408366084098816, "rewards/rejected": -1.0588552951812744, "step": 1770 }, { "epoch": 1.86, "learning_rate": 3.56511645725932e-07, "logits/chosen": -2.1519556045532227, "logits/rejected": -2.0771679878234863, "logps/chosen": -400.14923095703125, "logps/rejected": -423.17919921875, "loss": 0.5574, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.5555980205535889, "rewards/margins": 0.5850681066513062, "rewards/rejected": -1.1406662464141846, "step": 1780 }, { "epoch": 1.87, "learning_rate": 3.549906611449688e-07, "logits/chosen": -2.179636001586914, "logits/rejected": -2.0839486122131348, "logps/chosen": -403.0487365722656, "logps/rejected": -393.1607971191406, "loss": 0.5519, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.5947157144546509, "rewards/margins": 0.4571925699710846, "rewards/rejected": -1.051908254623413, "step": 1790 }, { "epoch": 1.88, "learning_rate": 3.534649434965505e-07, "logits/chosen": -2.122799873352051, "logits/rejected": -2.0291850566864014, "logps/chosen": -423.5863342285156, "logps/rejected": -406.68280029296875, "loss": 0.5702, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.7036025524139404, "rewards/margins": 0.47179120779037476, "rewards/rejected": -1.1753937005996704, "step": 1800 }, { "epoch": 1.88, "eval_logits/chosen": -2.1546154022216797, "eval_logits/rejected": -2.053884267807007, "eval_logps/chosen": -424.3879089355469, "eval_logps/rejected": -429.0813903808594, "eval_loss": 0.5842701196670532, "eval_rewards/accuracies": 0.7361111044883728, "eval_rewards/chosen": -0.7502815127372742, "eval_rewards/margins": 0.4615708589553833, "eval_rewards/rejected": -1.2118524312973022, "eval_runtime": 384.6527, "eval_samples_per_second": 5.199, "eval_steps_per_second": 0.164, "step": 1800 }, { "epoch": 1.89, "learning_rate": 3.5193456156131394e-07, "logits/chosen": -2.099229097366333, "logits/rejected": -2.0521140098571777, "logps/chosen": -401.4447021484375, "logps/rejected": -410.57550048828125, "loss": 0.5632, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.7107268571853638, "rewards/margins": 0.4665645658969879, "rewards/rejected": -1.1772915124893188, "step": 1810 }, { "epoch": 1.9, "learning_rate": 3.503995843301662e-07, "logits/chosen": -2.123899459838867, "logits/rejected": -1.977447509765625, "logps/chosen": -439.928466796875, "logps/rejected": -431.59295654296875, "loss": 0.5532, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.775931715965271, "rewards/margins": 0.5444117188453674, "rewards/rejected": -1.3203436136245728, "step": 1820 }, { "epoch": 1.92, "learning_rate": 3.488600810011739e-07, "logits/chosen": -2.1486592292785645, "logits/rejected": -2.0278477668762207, "logps/chosen": -425.73101806640625, "logps/rejected": -444.21044921875, "loss": 0.563, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.6286954879760742, "rewards/margins": 0.5148676633834839, "rewards/rejected": -1.143563151359558, "step": 1830 }, { "epoch": 1.93, "learning_rate": 3.4731612097644425e-07, "logits/chosen": -2.12416410446167, "logits/rejected": -1.9885361194610596, "logps/chosen": -420.1853942871094, "logps/rejected": -403.20587158203125, "loss": 0.5727, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.6813632845878601, "rewards/margins": 0.4498722553253174, "rewards/rejected": -1.1312355995178223, "step": 1840 }, { "epoch": 1.94, "learning_rate": 3.4576777385899567e-07, "logits/chosen": -2.059755802154541, "logits/rejected": -2.0208868980407715, "logps/chosen": -406.7179260253906, "logps/rejected": -423.005126953125, "loss": 0.548, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.5665189623832703, "rewards/margins": 0.44578060507774353, "rewards/rejected": -1.0122995376586914, "step": 1850 }, { "epoch": 1.95, "learning_rate": 3.4421510944962075e-07, "logits/chosen": -2.1009135246276855, "logits/rejected": -2.0589375495910645, "logps/chosen": -414.166015625, "logps/rejected": -463.72100830078125, "loss": 0.5715, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.7198529243469238, "rewards/margins": 0.3939817547798157, "rewards/rejected": -1.1138347387313843, "step": 1860 }, { "epoch": 1.96, "learning_rate": 3.4265819774373923e-07, "logits/chosen": -2.1253786087036133, "logits/rejected": -2.0618138313293457, "logps/chosen": -407.43878173828125, "logps/rejected": -423.29974365234375, "loss": 0.5431, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.6462190747261047, "rewards/margins": 0.46022695302963257, "rewards/rejected": -1.1064460277557373, "step": 1870 }, { "epoch": 1.97, "learning_rate": 3.410971089282423e-07, "logits/chosen": -2.095548391342163, "logits/rejected": -2.008098602294922, "logps/chosen": -416.75262451171875, "logps/rejected": -424.6083984375, "loss": 0.5702, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.7109544277191162, "rewards/margins": 0.5041101574897766, "rewards/rejected": -1.215064287185669, "step": 1880 }, { "epoch": 1.98, "learning_rate": 3.395319133783289e-07, "logits/chosen": -2.039357900619507, "logits/rejected": -1.9043476581573486, "logps/chosen": -384.81793212890625, "logps/rejected": -385.3327331542969, "loss": 0.5724, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.5902493000030518, "rewards/margins": 0.43905600905418396, "rewards/rejected": -1.0293052196502686, "step": 1890 }, { "epoch": 1.99, "learning_rate": 3.3796268165433314e-07, "logits/chosen": -2.0562853813171387, "logits/rejected": -2.0060746669769287, "logps/chosen": -386.97503662109375, "logps/rejected": -424.0105895996094, "loss": 0.5505, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.6826264262199402, "rewards/margins": 0.4815604090690613, "rewards/rejected": -1.1641868352890015, "step": 1900 }, { "epoch": 1.99, "eval_logits/chosen": -2.133981227874756, "eval_logits/rejected": -2.032824993133545, "eval_logps/chosen": -413.9261169433594, "eval_logps/rejected": -417.8120422363281, "eval_loss": 0.5851796865463257, "eval_rewards/accuracies": 0.7321428656578064, "eval_rewards/chosen": -0.6456640958786011, "eval_rewards/margins": 0.4534952640533447, "eval_rewards/rejected": -1.0991593599319458, "eval_runtime": 341.8323, "eval_samples_per_second": 5.851, "eval_steps_per_second": 0.184, "step": 1900 }, { "epoch": 2.0, "learning_rate": 3.363894844985432e-07, "logits/chosen": -2.1606099605560303, "logits/rejected": -2.1270124912261963, "logps/chosen": -408.40240478515625, "logps/rejected": -433.38653564453125, "loss": 0.5841, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.6379219889640808, "rewards/margins": 0.37952426075935364, "rewards/rejected": -1.0174461603164673, "step": 1910 }, { "epoch": 2.01, "learning_rate": 3.3481239283201205e-07, "logits/chosen": -2.0863089561462402, "logits/rejected": -1.9511423110961914, "logps/chosen": -434.5306091308594, "logps/rejected": -442.3555603027344, "loss": 0.5229, "rewards/accuracies": 0.75, "rewards/chosen": -0.7100010514259338, "rewards/margins": 0.5080522298812866, "rewards/rejected": -1.2180532217025757, "step": 1920 }, { "epoch": 2.02, "learning_rate": 3.332314777513608e-07, "logits/chosen": -2.0927722454071045, "logits/rejected": -1.9584945440292358, "logps/chosen": -425.11480712890625, "logps/rejected": -425.4661560058594, "loss": 0.5512, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.7203429937362671, "rewards/margins": 0.5184942483901978, "rewards/rejected": -1.2388372421264648, "step": 1930 }, { "epoch": 2.03, "learning_rate": 3.3164681052557315e-07, "logits/chosen": -2.045835494995117, "logits/rejected": -1.8995403051376343, "logps/chosen": -425.0332946777344, "logps/rejected": -431.9063415527344, "loss": 0.5476, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.7351408004760742, "rewards/margins": 0.5671769976615906, "rewards/rejected": -1.30231773853302, "step": 1940 }, { "epoch": 2.04, "learning_rate": 3.3005846259278257e-07, "logits/chosen": -1.9578487873077393, "logits/rejected": -1.9154150485992432, "logps/chosen": -363.75, "logps/rejected": -405.8369445800781, "loss": 0.5311, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.7119430899620056, "rewards/margins": 0.5486541986465454, "rewards/rejected": -1.2605974674224854, "step": 1950 }, { "epoch": 2.05, "learning_rate": 3.2846650555705207e-07, "logits/chosen": -2.09869384765625, "logits/rejected": -2.004453420639038, "logps/chosen": -420.51593017578125, "logps/rejected": -439.56475830078125, "loss": 0.5468, "rewards/accuracies": 0.71875, "rewards/chosen": -0.770484447479248, "rewards/margins": 0.5066950917243958, "rewards/rejected": -1.2771797180175781, "step": 1960 }, { "epoch": 2.06, "learning_rate": 3.268710111851459e-07, "logits/chosen": -2.1401185989379883, "logits/rejected": -2.0326156616210938, "logps/chosen": -423.18609619140625, "logps/rejected": -450.2783203125, "loss": 0.5479, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.8261698484420776, "rewards/margins": 0.5264754295349121, "rewards/rejected": -1.3526453971862793, "step": 1970 }, { "epoch": 2.07, "learning_rate": 3.252720514032946e-07, "logits/chosen": -2.0674030780792236, "logits/rejected": -1.956199049949646, "logps/chosen": -419.334716796875, "logps/rejected": -439.58056640625, "loss": 0.5457, "rewards/accuracies": 0.71875, "rewards/chosen": -0.7481150031089783, "rewards/margins": 0.5188611745834351, "rewards/rejected": -1.2669761180877686, "step": 1980 }, { "epoch": 2.08, "learning_rate": 3.236696982939521e-07, "logits/chosen": -2.081023693084717, "logits/rejected": -2.012528419494629, "logps/chosen": -398.89764404296875, "logps/rejected": -420.46185302734375, "loss": 0.5421, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.7415474057197571, "rewards/margins": 0.4471089243888855, "rewards/rejected": -1.188656210899353, "step": 1990 }, { "epoch": 2.09, "learning_rate": 3.2206402409254655e-07, "logits/chosen": -1.9845138788223267, "logits/rejected": -1.8962827920913696, "logps/chosen": -381.1123352050781, "logps/rejected": -405.03948974609375, "loss": 0.5389, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.640026330947876, "rewards/margins": 0.5347259044647217, "rewards/rejected": -1.1747523546218872, "step": 2000 }, { "epoch": 2.09, "eval_logits/chosen": -2.080613374710083, "eval_logits/rejected": -1.976927399635315, "eval_logps/chosen": -422.3401794433594, "eval_logps/rejected": -427.39385986328125, "eval_loss": 0.5828012228012085, "eval_rewards/accuracies": 0.726190447807312, "eval_rewards/chosen": -0.7298040390014648, "eval_rewards/margins": 0.4651729166507721, "eval_rewards/rejected": -1.1949769258499146, "eval_runtime": 350.2329, "eval_samples_per_second": 5.71, "eval_steps_per_second": 0.18, "step": 2000 }, { "epoch": 2.1, "learning_rate": 3.204551011842237e-07, "logits/chosen": -2.084751605987549, "logits/rejected": -1.9851831197738647, "logps/chosen": -423.9139099121094, "logps/rejected": -449.2900390625, "loss": 0.5353, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.6902648210525513, "rewards/margins": 0.5586960911750793, "rewards/rejected": -1.2489607334136963, "step": 2010 }, { "epoch": 2.11, "learning_rate": 3.188430021005837e-07, "logits/chosen": -2.000121831893921, "logits/rejected": -1.9606053829193115, "logps/chosen": -390.7865905761719, "logps/rejected": -434.36004638671875, "loss": 0.5281, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.7369731068611145, "rewards/margins": 0.5088338255882263, "rewards/rejected": -1.2458069324493408, "step": 2020 }, { "epoch": 2.12, "learning_rate": 3.172277995164112e-07, "logits/chosen": -2.0466647148132324, "logits/rejected": -1.9290826320648193, "logps/chosen": -436.10076904296875, "logps/rejected": -415.53369140625, "loss": 0.5291, "rewards/accuracies": 0.6875, "rewards/chosen": -0.711915135383606, "rewards/margins": 0.4383808970451355, "rewards/rejected": -1.1502960920333862, "step": 2030 }, { "epoch": 2.14, "learning_rate": 3.156095662463998e-07, "logits/chosen": -1.9822591543197632, "logits/rejected": -1.9326177835464478, "logps/chosen": -386.89080810546875, "logps/rejected": -449.0328674316406, "loss": 0.5138, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.8381987810134888, "rewards/margins": 0.5326210260391235, "rewards/rejected": -1.3708198070526123, "step": 2040 }, { "epoch": 2.15, "learning_rate": 3.139883752418682e-07, "logits/chosen": -1.9811346530914307, "logits/rejected": -1.902254343032837, "logps/chosen": -441.2205505371094, "logps/rejected": -464.0350036621094, "loss": 0.5244, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.8369643092155457, "rewards/margins": 0.5579534769058228, "rewards/rejected": -1.3949177265167236, "step": 2050 }, { "epoch": 2.16, "learning_rate": 3.1236429958747294e-07, "logits/chosen": -1.9652820825576782, "logits/rejected": -1.8512372970581055, "logps/chosen": -416.72607421875, "logps/rejected": -415.53912353515625, "loss": 0.5416, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.9153169393539429, "rewards/margins": 0.4332866668701172, "rewards/rejected": -1.34860360622406, "step": 2060 }, { "epoch": 2.17, "learning_rate": 3.107374124979127e-07, "logits/chosen": -2.0142264366149902, "logits/rejected": -1.8807601928710938, "logps/chosen": -388.02777099609375, "logps/rejected": -400.16973876953125, "loss": 0.5307, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.7066773176193237, "rewards/margins": 0.537675678730011, "rewards/rejected": -1.2443530559539795, "step": 2070 }, { "epoch": 2.18, "learning_rate": 3.0910778731462807e-07, "logits/chosen": -2.0407018661499023, "logits/rejected": -1.960519552230835, "logps/chosen": -405.73577880859375, "logps/rejected": -430.8636169433594, "loss": 0.5371, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.6238777041435242, "rewards/margins": 0.5529268980026245, "rewards/rejected": -1.176804542541504, "step": 2080 }, { "epoch": 2.19, "learning_rate": 3.0747549750249517e-07, "logits/chosen": -2.1523895263671875, "logits/rejected": -1.9908783435821533, "logps/chosen": -470.22454833984375, "logps/rejected": -458.03350830078125, "loss": 0.5252, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.7559888958930969, "rewards/margins": 0.6371704339981079, "rewards/rejected": -1.3931593894958496, "step": 2090 }, { "epoch": 2.2, "learning_rate": 3.058406166465139e-07, "logits/chosen": -2.0282669067382812, "logits/rejected": -1.9857925176620483, "logps/chosen": -439.1322326660156, "logps/rejected": -473.20806884765625, "loss": 0.531, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.8369047045707703, "rewards/margins": 0.5290185213088989, "rewards/rejected": -1.3659231662750244, "step": 2100 }, { "epoch": 2.2, "eval_logits/chosen": -2.056457281112671, "eval_logits/rejected": -1.9511338472366333, "eval_logps/chosen": -437.7683410644531, "eval_logps/rejected": -446.1321716308594, "eval_loss": 0.5804704427719116, "eval_rewards/accuracies": 0.7341269850730896, "eval_rewards/chosen": -0.8840858936309814, "eval_rewards/margins": 0.4982740879058838, "eval_rewards/rejected": -1.3823601007461548, "eval_runtime": 346.9914, "eval_samples_per_second": 5.764, "eval_steps_per_second": 0.182, "step": 2100 }, { "epoch": 2.21, "learning_rate": 3.0420321844849056e-07, "logits/chosen": -2.0606753826141357, "logits/rejected": -1.9562078714370728, "logps/chosen": -453.8863830566406, "logps/rejected": -467.1893615722656, "loss": 0.5282, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.8377038836479187, "rewards/margins": 0.6110423803329468, "rewards/rejected": -1.4487463235855103, "step": 2110 }, { "epoch": 2.22, "learning_rate": 3.0256337672371543e-07, "logits/chosen": -2.0410220623016357, "logits/rejected": -2.0064282417297363, "logps/chosen": -400.1402282714844, "logps/rejected": -408.17462158203125, "loss": 0.5285, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.7289966344833374, "rewards/margins": 0.5061737298965454, "rewards/rejected": -1.2351701259613037, "step": 2120 }, { "epoch": 2.23, "learning_rate": 3.0092116539763487e-07, "logits/chosen": -2.0146710872650146, "logits/rejected": -1.920397162437439, "logps/chosen": -437.10235595703125, "logps/rejected": -466.9913635253906, "loss": 0.5066, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.8071281313896179, "rewards/margins": 0.6220065355300903, "rewards/rejected": -1.429134726524353, "step": 2130 }, { "epoch": 2.24, "learning_rate": 2.99276658502519e-07, "logits/chosen": -1.964906096458435, "logits/rejected": -1.9002695083618164, "logps/chosen": -375.3905944824219, "logps/rejected": -396.67144775390625, "loss": 0.5269, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.7183672189712524, "rewards/margins": 0.5746656656265259, "rewards/rejected": -1.2930328845977783, "step": 2140 }, { "epoch": 2.25, "learning_rate": 2.9762993017412404e-07, "logits/chosen": -2.0477283000946045, "logits/rejected": -1.9289798736572266, "logps/chosen": -438.35833740234375, "logps/rejected": -434.1504821777344, "loss": 0.5129, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.7393466234207153, "rewards/margins": 0.5844893455505371, "rewards/rejected": -1.323835849761963, "step": 2150 }, { "epoch": 2.26, "learning_rate": 2.959810546483505e-07, "logits/chosen": -2.0242910385131836, "logits/rejected": -1.91313898563385, "logps/chosen": -394.24859619140625, "logps/rejected": -425.3580017089844, "loss": 0.5063, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.703346312046051, "rewards/margins": 0.6648355722427368, "rewards/rejected": -1.3681819438934326, "step": 2160 }, { "epoch": 2.27, "learning_rate": 2.94330106257896e-07, "logits/chosen": -2.034824848175049, "logits/rejected": -1.9532169103622437, "logps/chosen": -418.4927673339844, "logps/rejected": -450.23297119140625, "loss": 0.4943, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.9016658663749695, "rewards/margins": 0.5852879285812378, "rewards/rejected": -1.486953854560852, "step": 2170 }, { "epoch": 2.28, "learning_rate": 2.92677159428905e-07, "logits/chosen": -2.0668792724609375, "logits/rejected": -1.9423834085464478, "logps/chosen": -431.8003845214844, "logps/rejected": -459.1951599121094, "loss": 0.5502, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.950252890586853, "rewards/margins": 0.5695884227752686, "rewards/rejected": -1.5198414325714111, "step": 2180 }, { "epoch": 2.29, "learning_rate": 2.9102228867761297e-07, "logits/chosen": -1.9795408248901367, "logits/rejected": -1.8501724004745483, "logps/chosen": -449.26910400390625, "logps/rejected": -450.2264099121094, "loss": 0.5187, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.9034382700920105, "rewards/margins": 0.6167550086975098, "rewards/rejected": -1.5201932191848755, "step": 2190 }, { "epoch": 2.3, "learning_rate": 2.8936556860698764e-07, "logits/chosen": -1.9710218906402588, "logits/rejected": -1.8519203662872314, "logps/chosen": -418.4292907714844, "logps/rejected": -471.595703125, "loss": 0.5162, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.7069419622421265, "rewards/margins": 0.6886085867881775, "rewards/rejected": -1.3955506086349487, "step": 2200 }, { "epoch": 2.3, "eval_logits/chosen": -2.018048048019409, "eval_logits/rejected": -1.9111573696136475, "eval_logps/chosen": -435.0021667480469, "eval_logps/rejected": -443.46435546875, "eval_loss": 0.5830379724502563, "eval_rewards/accuracies": 0.7341269850730896, "eval_rewards/chosen": -0.8564241528511047, "eval_rewards/margins": 0.4992583692073822, "eval_rewards/rejected": -1.355682611465454, "eval_runtime": 366.1679, "eval_samples_per_second": 5.462, "eval_steps_per_second": 0.172, "step": 2200 }, { "epoch": 2.31, "learning_rate": 2.8770707390336545e-07, "logits/chosen": -2.042503833770752, "logits/rejected": -1.9252078533172607, "logps/chosen": -412.6761169433594, "logps/rejected": -434.27337646484375, "loss": 0.5181, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.7401344180107117, "rewards/margins": 0.6476941108703613, "rewards/rejected": -1.3878285884857178, "step": 2210 }, { "epoch": 2.32, "learning_rate": 2.860468793330849e-07, "logits/chosen": -1.983633041381836, "logits/rejected": -1.8197529315948486, "logps/chosen": -450.23626708984375, "logps/rejected": -447.0399475097656, "loss": 0.4956, "rewards/accuracies": 0.71875, "rewards/chosen": -0.8933134078979492, "rewards/margins": 0.6681145429611206, "rewards/rejected": -1.5614279508590698, "step": 2220 }, { "epoch": 2.33, "learning_rate": 2.843850597391159e-07, "logits/chosen": -2.0030102729797363, "logits/rejected": -1.901752233505249, "logps/chosen": -439.53912353515625, "logps/rejected": -469.72259521484375, "loss": 0.5302, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.8122542500495911, "rewards/margins": 0.6632959842681885, "rewards/rejected": -1.4755501747131348, "step": 2230 }, { "epoch": 2.34, "learning_rate": 2.827216900376857e-07, "logits/chosen": -1.844321846961975, "logits/rejected": -1.7138290405273438, "logps/chosen": -439.25732421875, "logps/rejected": -470.17889404296875, "loss": 0.4927, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.8151344060897827, "rewards/margins": 0.7723340392112732, "rewards/rejected": -1.5874683856964111, "step": 2240 }, { "epoch": 2.35, "learning_rate": 2.810568452149019e-07, "logits/chosen": -2.096543312072754, "logits/rejected": -1.9603042602539062, "logps/chosen": -471.48907470703125, "logps/rejected": -477.1119689941406, "loss": 0.5464, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.8581037521362305, "rewards/margins": 0.599617600440979, "rewards/rejected": -1.457721471786499, "step": 2250 }, { "epoch": 2.37, "learning_rate": 2.793906003233714e-07, "logits/chosen": -2.0717244148254395, "logits/rejected": -1.9762405157089233, "logps/chosen": -421.10687255859375, "logps/rejected": -447.38873291015625, "loss": 0.5298, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.7920399904251099, "rewards/margins": 0.5180048942565918, "rewards/rejected": -1.3100448846817017, "step": 2260 }, { "epoch": 2.38, "learning_rate": 2.77723030478818e-07, "logits/chosen": -1.9469770193099976, "logits/rejected": -1.9061082601547241, "logps/chosen": -380.3778076171875, "logps/rejected": -470.05694580078125, "loss": 0.5139, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.7958939075469971, "rewards/margins": 0.6314458250999451, "rewards/rejected": -1.4273395538330078, "step": 2270 }, { "epoch": 2.39, "learning_rate": 2.760542108566949e-07, "logits/chosen": -2.0161285400390625, "logits/rejected": -1.881219506263733, "logps/chosen": -457.6134338378906, "logps/rejected": -443.34747314453125, "loss": 0.54, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.7452512979507446, "rewards/margins": 0.5063202977180481, "rewards/rejected": -1.2515714168548584, "step": 2280 }, { "epoch": 2.4, "learning_rate": 2.7438421668879676e-07, "logits/chosen": -1.9628798961639404, "logits/rejected": -1.935486078262329, "logps/chosen": -380.9690246582031, "logps/rejected": -417.7799377441406, "loss": 0.5236, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.7742933630943298, "rewards/margins": 0.5208727121353149, "rewards/rejected": -1.295166254043579, "step": 2290 }, { "epoch": 2.41, "learning_rate": 2.7271312325986734e-07, "logits/chosen": -1.9569809436798096, "logits/rejected": -1.8591148853302002, "logps/chosen": -414.0211486816406, "logps/rejected": -455.06341552734375, "loss": 0.5297, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.8452693223953247, "rewards/margins": 0.6661224961280823, "rewards/rejected": -1.5113918781280518, "step": 2300 }, { "epoch": 2.41, "eval_logits/chosen": -1.9911383390426636, "eval_logits/rejected": -1.8837895393371582, "eval_logps/chosen": -448.7518615722656, "eval_logps/rejected": -459.412353515625, "eval_loss": 0.5794528722763062, "eval_rewards/accuracies": 0.7182539701461792, "eval_rewards/chosen": -0.9939210414886475, "eval_rewards/margins": 0.521240770816803, "eval_rewards/rejected": -1.5151617527008057, "eval_runtime": 387.0195, "eval_samples_per_second": 5.168, "eval_steps_per_second": 0.163, "step": 2300 }, { "epoch": 2.42, "learning_rate": 2.710410059042066e-07, "logits/chosen": -1.9845149517059326, "logits/rejected": -1.8956083059310913, "logps/chosen": -429.16522216796875, "logps/rejected": -471.6270446777344, "loss": 0.5208, "rewards/accuracies": 0.75, "rewards/chosen": -0.981905460357666, "rewards/margins": 0.7246658802032471, "rewards/rejected": -1.706571340560913, "step": 2310 }, { "epoch": 2.43, "learning_rate": 2.693679400022733e-07, "logits/chosen": -1.9099270105361938, "logits/rejected": -1.8261696100234985, "logps/chosen": -418.8258361816406, "logps/rejected": -440.328369140625, "loss": 0.5342, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.9699192047119141, "rewards/margins": 0.4898689389228821, "rewards/rejected": -1.4597880840301514, "step": 2320 }, { "epoch": 2.44, "learning_rate": 2.6769400097728797e-07, "logits/chosen": -1.98947274684906, "logits/rejected": -1.855577826499939, "logps/chosen": -429.56695556640625, "logps/rejected": -422.18768310546875, "loss": 0.5434, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.7829546332359314, "rewards/margins": 0.5939928293228149, "rewards/rejected": -1.3769476413726807, "step": 2330 }, { "epoch": 2.45, "learning_rate": 2.660192642918321e-07, "logits/chosen": -1.9994666576385498, "logits/rejected": -1.9309895038604736, "logps/chosen": -437.77703857421875, "logps/rejected": -442.0591735839844, "loss": 0.5328, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.7046215534210205, "rewards/margins": 0.5308600068092346, "rewards/rejected": -1.2354816198349, "step": 2340 }, { "epoch": 2.46, "learning_rate": 2.643438054444462e-07, "logits/chosen": -1.9171969890594482, "logits/rejected": -1.8357493877410889, "logps/chosen": -449.2801818847656, "logps/rejected": -445.749755859375, "loss": 0.5248, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.7818909883499146, "rewards/margins": 0.6039397716522217, "rewards/rejected": -1.3858308792114258, "step": 2350 }, { "epoch": 2.47, "learning_rate": 2.626676999662269e-07, "logits/chosen": -1.9482982158660889, "logits/rejected": -1.8158115148544312, "logps/chosen": -417.89886474609375, "logps/rejected": -441.44964599609375, "loss": 0.545, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.8549971580505371, "rewards/margins": 0.5220503807067871, "rewards/rejected": -1.3770474195480347, "step": 2360 }, { "epoch": 2.48, "learning_rate": 2.60991023417421e-07, "logits/chosen": -1.9489740133285522, "logits/rejected": -1.7895710468292236, "logps/chosen": -433.61700439453125, "logps/rejected": -446.4412536621094, "loss": 0.5197, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.7963303923606873, "rewards/margins": 0.702279806137085, "rewards/rejected": -1.498610258102417, "step": 2370 }, { "epoch": 2.49, "learning_rate": 2.593138513840199e-07, "logits/chosen": -1.8876575231552124, "logits/rejected": -1.8479945659637451, "logps/chosen": -415.013916015625, "logps/rejected": -434.8460998535156, "loss": 0.532, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.9357647895812988, "rewards/margins": 0.4299185872077942, "rewards/rejected": -1.3656833171844482, "step": 2380 }, { "epoch": 2.5, "learning_rate": 2.576362594743518e-07, "logits/chosen": -1.9661098718643188, "logits/rejected": -1.857072114944458, "logps/chosen": -408.663818359375, "logps/rejected": -415.71331787109375, "loss": 0.5464, "rewards/accuracies": 0.78125, "rewards/chosen": -0.7987567186355591, "rewards/margins": 0.6057206392288208, "rewards/rejected": -1.4044773578643799, "step": 2390 }, { "epoch": 2.51, "learning_rate": 2.559583233156734e-07, "logits/chosen": -1.9324119091033936, "logits/rejected": -1.7781422138214111, "logps/chosen": -424.632080078125, "logps/rejected": -436.2330017089844, "loss": 0.5143, "rewards/accuracies": 0.75, "rewards/chosen": -0.8693172335624695, "rewards/margins": 0.5977479815483093, "rewards/rejected": -1.4670653343200684, "step": 2400 }, { "epoch": 2.51, "eval_logits/chosen": -1.9853116273880005, "eval_logits/rejected": -1.8784489631652832, "eval_logps/chosen": -436.2056884765625, "eval_logps/rejected": -445.7617492675781, "eval_loss": 0.5805792212486267, "eval_rewards/accuracies": 0.7321428656578064, "eval_rewards/chosen": -0.8684592843055725, "eval_rewards/margins": 0.5101962685585022, "eval_rewards/rejected": -1.3786555528640747, "eval_runtime": 338.8092, "eval_samples_per_second": 5.903, "eval_steps_per_second": 0.186, "step": 2400 }, { "epoch": 2.52, "learning_rate": 2.5428011855076023e-07, "logits/chosen": -1.9499645233154297, "logits/rejected": -1.8366508483886719, "logps/chosen": -436.2471618652344, "logps/rejected": -451.01507568359375, "loss": 0.5401, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.8622430562973022, "rewards/margins": 0.6568028926849365, "rewards/rejected": -1.5190460681915283, "step": 2410 }, { "epoch": 2.53, "learning_rate": 2.5260172083449693e-07, "logits/chosen": -2.0690159797668457, "logits/rejected": -1.9587256908416748, "logps/chosen": -443.0089416503906, "logps/rejected": -469.9134826660156, "loss": 0.4956, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.8210717439651489, "rewards/margins": 0.6067072153091431, "rewards/rejected": -1.427778959274292, "step": 2420 }, { "epoch": 2.54, "learning_rate": 2.509232058304666e-07, "logits/chosen": -1.95901358127594, "logits/rejected": -1.9063999652862549, "logps/chosen": -447.19708251953125, "logps/rejected": -482.42864990234375, "loss": 0.5353, "rewards/accuracies": 0.78125, "rewards/chosen": -0.8421271443367004, "rewards/margins": 0.6823471784591675, "rewards/rejected": -1.5244743824005127, "step": 2430 }, { "epoch": 2.55, "learning_rate": 2.492446492075396e-07, "logits/chosen": -1.949507474899292, "logits/rejected": -1.9200721979141235, "logps/chosen": -383.2177429199219, "logps/rejected": -432.87353515625, "loss": 0.5123, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.887121319770813, "rewards/margins": 0.6234883069992065, "rewards/rejected": -1.5106096267700195, "step": 2440 }, { "epoch": 2.56, "learning_rate": 2.475661266364628e-07, "logits/chosen": -1.8691284656524658, "logits/rejected": -1.7979360818862915, "logps/chosen": -446.46044921875, "logps/rejected": -468.6094665527344, "loss": 0.4934, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.869927704334259, "rewards/margins": 0.6707456111907959, "rewards/rejected": -1.5406733751296997, "step": 2450 }, { "epoch": 2.57, "learning_rate": 2.4588771378644754e-07, "logits/chosen": -2.0588347911834717, "logits/rejected": -1.9376767873764038, "logps/chosen": -473.5218811035156, "logps/rejected": -489.5943298339844, "loss": 0.496, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.9387062788009644, "rewards/margins": 0.6001947522163391, "rewards/rejected": -1.5389010906219482, "step": 2460 }, { "epoch": 2.59, "learning_rate": 2.4420948632175926e-07, "logits/chosen": -1.9621975421905518, "logits/rejected": -1.8935844898223877, "logps/chosen": -433.59033203125, "logps/rejected": -479.13861083984375, "loss": 0.5293, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.7825122475624084, "rewards/margins": 0.6647100448608398, "rewards/rejected": -1.447222113609314, "step": 2470 }, { "epoch": 2.6, "learning_rate": 2.4253151989830596e-07, "logits/chosen": -1.8841426372528076, "logits/rejected": -1.8874428272247314, "logps/chosen": -425.667724609375, "logps/rejected": -442.14306640625, "loss": 0.523, "rewards/accuracies": 0.65625, "rewards/chosen": -0.8772993087768555, "rewards/margins": 0.4364490509033203, "rewards/rejected": -1.3137483596801758, "step": 2480 }, { "epoch": 2.61, "learning_rate": 2.408538901602275e-07, "logits/chosen": -1.9295036792755127, "logits/rejected": -1.81784987449646, "logps/chosen": -408.718505859375, "logps/rejected": -428.29803466796875, "loss": 0.5225, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.8873177766799927, "rewards/margins": 0.5828737020492554, "rewards/rejected": -1.470191240310669, "step": 2490 }, { "epoch": 2.62, "learning_rate": 2.3917667273648594e-07, "logits/chosen": -1.9346414804458618, "logits/rejected": -1.8728351593017578, "logps/chosen": -439.614501953125, "logps/rejected": -444.1954650878906, "loss": 0.5377, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.9809148907661438, "rewards/margins": 0.43168848752975464, "rewards/rejected": -1.4126031398773193, "step": 2500 }, { "epoch": 2.62, "eval_logits/chosen": -1.9647775888442993, "eval_logits/rejected": -1.8571594953536987, "eval_logps/chosen": -443.1573791503906, "eval_logps/rejected": -454.7680358886719, "eval_loss": 0.5785647034645081, "eval_rewards/accuracies": 0.72817462682724, "eval_rewards/chosen": -0.9379764199256897, "eval_rewards/margins": 0.5307427644729614, "eval_rewards/rejected": -1.468719244003296, "eval_runtime": 338.5492, "eval_samples_per_second": 5.908, "eval_steps_per_second": 0.186, "step": 2500 }, { "epoch": 2.63, "learning_rate": 2.374999432374556e-07, "logits/chosen": -1.8711265325546265, "logits/rejected": -1.8591110706329346, "logps/chosen": -402.60113525390625, "logps/rejected": -465.11822509765625, "loss": 0.5378, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.9721585512161255, "rewards/margins": 0.5550946593284607, "rewards/rejected": -1.5272531509399414, "step": 2510 }, { "epoch": 2.64, "learning_rate": 2.3582377725151504e-07, "logits/chosen": -1.9033355712890625, "logits/rejected": -1.7703205347061157, "logps/chosen": -436.557861328125, "logps/rejected": -434.04351806640625, "loss": 0.5082, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.9090617299079895, "rewards/margins": 0.6059231758117676, "rewards/rejected": -1.5149848461151123, "step": 2520 }, { "epoch": 2.65, "learning_rate": 2.3414825034163877e-07, "logits/chosen": -1.9475148916244507, "logits/rejected": -1.9001047611236572, "logps/chosen": -474.4970703125, "logps/rejected": -480.343505859375, "loss": 0.5253, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.8905152082443237, "rewards/margins": 0.5469620227813721, "rewards/rejected": -1.4374772310256958, "step": 2530 }, { "epoch": 2.66, "learning_rate": 2.3247343804199176e-07, "logits/chosen": -1.8764568567276, "logits/rejected": -1.7902402877807617, "logps/chosen": -422.77581787109375, "logps/rejected": -479.0648498535156, "loss": 0.496, "rewards/accuracies": 0.84375, "rewards/chosen": -0.8904309272766113, "rewards/margins": 0.7856873273849487, "rewards/rejected": -1.67611825466156, "step": 2540 }, { "epoch": 2.67, "learning_rate": 2.3079941585452318e-07, "logits/chosen": -1.9895591735839844, "logits/rejected": -1.8538814783096313, "logps/chosen": -476.71563720703125, "logps/rejected": -475.23406982421875, "loss": 0.5143, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.8902314901351929, "rewards/margins": 0.617530107498169, "rewards/rejected": -1.5077615976333618, "step": 2550 }, { "epoch": 2.68, "learning_rate": 2.2912625924556366e-07, "logits/chosen": -1.8772594928741455, "logits/rejected": -1.8478418588638306, "logps/chosen": -429.35595703125, "logps/rejected": -496.8270568847656, "loss": 0.5236, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.8693763613700867, "rewards/margins": 0.6027558445930481, "rewards/rejected": -1.4721323251724243, "step": 2560 }, { "epoch": 2.69, "learning_rate": 2.2745404364242276e-07, "logits/chosen": -1.9632251262664795, "logits/rejected": -1.812048316001892, "logps/chosen": -458.9662170410156, "logps/rejected": -465.8284606933594, "loss": 0.5293, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.9237990379333496, "rewards/margins": 0.5360890626907349, "rewards/rejected": -1.459887981414795, "step": 2570 }, { "epoch": 2.7, "learning_rate": 2.2578284442998854e-07, "logits/chosen": -1.8958606719970703, "logits/rejected": -1.7544406652450562, "logps/chosen": -471.21124267578125, "logps/rejected": -447.39581298828125, "loss": 0.5315, "rewards/accuracies": 0.75, "rewards/chosen": -0.9969693422317505, "rewards/margins": 0.6323047876358032, "rewards/rejected": -1.6292740106582642, "step": 2580 }, { "epoch": 2.71, "learning_rate": 2.2411273694732952e-07, "logits/chosen": -1.8865602016448975, "logits/rejected": -1.7838201522827148, "logps/chosen": -442.34429931640625, "logps/rejected": -468.42486572265625, "loss": 0.5237, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.9607971906661987, "rewards/margins": 0.6720181703567505, "rewards/rejected": -1.6328153610229492, "step": 2590 }, { "epoch": 2.72, "learning_rate": 2.224437964842979e-07, "logits/chosen": -1.8734734058380127, "logits/rejected": -1.7853962182998657, "logps/chosen": -408.92877197265625, "logps/rejected": -463.09503173828125, "loss": 0.4868, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.7472456097602844, "rewards/margins": 0.808722972869873, "rewards/rejected": -1.5559687614440918, "step": 2600 }, { "epoch": 2.72, "eval_logits/chosen": -1.9504142999649048, "eval_logits/rejected": -1.8415662050247192, "eval_logps/chosen": -439.4378662109375, "eval_logps/rejected": -450.51556396484375, "eval_loss": 0.579669177532196, "eval_rewards/accuracies": 0.7301587462425232, "eval_rewards/chosen": -0.9007813930511475, "eval_rewards/margins": 0.5254126787185669, "eval_rewards/rejected": -1.4261939525604248, "eval_runtime": 397.2665, "eval_samples_per_second": 5.034, "eval_steps_per_second": 0.159, "step": 2600 }, { "epoch": 2.73, "learning_rate": 2.2077609827813592e-07, "logits/chosen": -1.8535270690917969, "logits/rejected": -1.7869393825531006, "logps/chosen": -416.6768493652344, "logps/rejected": -461.3543395996094, "loss": 0.5043, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.9135526418685913, "rewards/margins": 0.6141065359115601, "rewards/rejected": -1.5276591777801514, "step": 2610 }, { "epoch": 2.74, "learning_rate": 2.1910971751008347e-07, "logits/chosen": -1.897220253944397, "logits/rejected": -1.807562232017517, "logps/chosen": -446.760498046875, "logps/rejected": -471.2540588378906, "loss": 0.5076, "rewards/accuracies": 0.78125, "rewards/chosen": -0.9037901759147644, "rewards/margins": 0.6555687785148621, "rewards/rejected": -1.559358835220337, "step": 2620 }, { "epoch": 2.75, "learning_rate": 2.1744472930198977e-07, "logits/chosen": -1.9448814392089844, "logits/rejected": -1.871311902999878, "logps/chosen": -445.2456970214844, "logps/rejected": -482.9363708496094, "loss": 0.4983, "rewards/accuracies": 0.71875, "rewards/chosen": -0.9247520565986633, "rewards/margins": 0.561426043510437, "rewards/rejected": -1.4861780405044556, "step": 2630 }, { "epoch": 2.76, "learning_rate": 2.1578120871292553e-07, "logits/chosen": -1.9801807403564453, "logits/rejected": -1.870661973953247, "logps/chosen": -468.67926025390625, "logps/rejected": -501.02392578125, "loss": 0.5288, "rewards/accuracies": 0.6875, "rewards/chosen": -0.9676277041435242, "rewards/margins": 0.5955823659896851, "rewards/rejected": -1.5632102489471436, "step": 2640 }, { "epoch": 2.77, "learning_rate": 2.141192307358008e-07, "logits/chosen": -1.8639633655548096, "logits/rejected": -1.810063123703003, "logps/chosen": -423.62939453125, "logps/rejected": -425.0008239746094, "loss": 0.5144, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.8432968258857727, "rewards/margins": 0.6521421074867249, "rewards/rejected": -1.495439052581787, "step": 2650 }, { "epoch": 2.78, "learning_rate": 2.1245887029398247e-07, "logits/chosen": -1.9204838275909424, "logits/rejected": -1.8166393041610718, "logps/chosen": -424.845458984375, "logps/rejected": -459.1109313964844, "loss": 0.5224, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.9046141505241394, "rewards/margins": 0.6672372817993164, "rewards/rejected": -1.5718514919281006, "step": 2660 }, { "epoch": 2.79, "learning_rate": 2.108002022379184e-07, "logits/chosen": -1.9082868099212646, "logits/rejected": -1.8343610763549805, "logps/chosen": -463.3162536621094, "logps/rejected": -486.88018798828125, "loss": 0.5358, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -1.066706895828247, "rewards/margins": 0.5957939624786377, "rewards/rejected": -1.6625009775161743, "step": 2670 }, { "epoch": 2.8, "learning_rate": 2.0914330134176185e-07, "logits/chosen": -1.9134643077850342, "logits/rejected": -1.8917551040649414, "logps/chosen": -440.2626953125, "logps/rejected": -504.54150390625, "loss": 0.5004, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.9903669357299805, "rewards/margins": 0.610992968082428, "rewards/rejected": -1.6013599634170532, "step": 2680 }, { "epoch": 2.82, "learning_rate": 2.0748824230000098e-07, "logits/chosen": -1.8200502395629883, "logits/rejected": -1.6779934167861938, "logps/chosen": -431.42169189453125, "logps/rejected": -430.67974853515625, "loss": 0.5036, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.9627591967582703, "rewards/margins": 0.6116820573806763, "rewards/rejected": -1.5744411945343018, "step": 2690 }, { "epoch": 2.83, "learning_rate": 2.0583509972409186e-07, "logits/chosen": -1.8566009998321533, "logits/rejected": -1.7274389266967773, "logps/chosen": -421.042724609375, "logps/rejected": -423.007080078125, "loss": 0.5275, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.8959705233573914, "rewards/margins": 0.5554197430610657, "rewards/rejected": -1.4513903856277466, "step": 2700 }, { "epoch": 2.83, "eval_logits/chosen": -1.921860694885254, "eval_logits/rejected": -1.8117154836654663, "eval_logps/chosen": -447.6714172363281, "eval_logps/rejected": -460.6926574707031, "eval_loss": 0.575380265712738, "eval_rewards/accuracies": 0.72817462682724, "eval_rewards/chosen": -0.983116626739502, "eval_rewards/margins": 0.5448485016822815, "eval_rewards/rejected": -1.5279650688171387, "eval_runtime": 389.6361, "eval_samples_per_second": 5.133, "eval_steps_per_second": 0.162, "step": 2700 }, { "epoch": 2.84, "learning_rate": 2.0418394813909434e-07, "logits/chosen": -1.9249767065048218, "logits/rejected": -1.7549772262573242, "logps/chosen": -426.38470458984375, "logps/rejected": -445.249755859375, "loss": 0.5327, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.9240328669548035, "rewards/margins": 0.6637715101242065, "rewards/rejected": -1.5878043174743652, "step": 2710 }, { "epoch": 2.85, "learning_rate": 2.025348619803132e-07, "logits/chosen": -1.861976981163025, "logits/rejected": -1.747554063796997, "logps/chosen": -411.2970275878906, "logps/rejected": -425.08441162109375, "loss": 0.548, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.8863030672073364, "rewards/margins": 0.5835781693458557, "rewards/rejected": -1.4698810577392578, "step": 2720 }, { "epoch": 2.86, "learning_rate": 2.0088791558994143e-07, "logits/chosen": -1.8526477813720703, "logits/rejected": -1.8287807703018188, "logps/chosen": -447.93243408203125, "logps/rejected": -471.99273681640625, "loss": 0.5235, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.8392646908760071, "rewards/margins": 0.6029472351074219, "rewards/rejected": -1.4422122240066528, "step": 2730 }, { "epoch": 2.87, "learning_rate": 1.9924318321371013e-07, "logits/chosen": -1.9511082172393799, "logits/rejected": -1.810118317604065, "logps/chosen": -435.09991455078125, "logps/rejected": -457.4765625, "loss": 0.4957, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.8674267530441284, "rewards/margins": 0.7397493124008179, "rewards/rejected": -1.6071761846542358, "step": 2740 }, { "epoch": 2.88, "learning_rate": 1.976007389975401e-07, "logits/chosen": -1.909641981124878, "logits/rejected": -1.8077083826065063, "logps/chosen": -428.82171630859375, "logps/rejected": -457.6050720214844, "loss": 0.5122, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.8866938352584839, "rewards/margins": 0.6875293850898743, "rewards/rejected": -1.5742231607437134, "step": 2750 }, { "epoch": 2.89, "learning_rate": 1.959606569842006e-07, "logits/chosen": -1.8715641498565674, "logits/rejected": -1.7681375741958618, "logps/chosen": -402.3782958984375, "logps/rejected": -419.69970703125, "loss": 0.5167, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.9384256601333618, "rewards/margins": 0.5532727241516113, "rewards/rejected": -1.4916983842849731, "step": 2760 }, { "epoch": 2.9, "learning_rate": 1.9432301110997034e-07, "logits/chosen": -1.85273015499115, "logits/rejected": -1.7387834787368774, "logps/chosen": -444.2647399902344, "logps/rejected": -481.83685302734375, "loss": 0.4981, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.9170455932617188, "rewards/margins": 0.6824191212654114, "rewards/rejected": -1.5994646549224854, "step": 2770 }, { "epoch": 2.91, "learning_rate": 1.9268787520130504e-07, "logits/chosen": -1.7926514148712158, "logits/rejected": -1.67780339717865, "logps/chosen": -389.5690002441406, "logps/rejected": -420.969970703125, "loss": 0.5282, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.9584437608718872, "rewards/margins": 0.5365868806838989, "rewards/rejected": -1.4950306415557861, "step": 2780 }, { "epoch": 2.92, "learning_rate": 1.91055322971509e-07, "logits/chosen": -1.8190996646881104, "logits/rejected": -1.7043695449829102, "logps/chosen": -409.02398681640625, "logps/rejected": -439.34368896484375, "loss": 0.5061, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.8551441431045532, "rewards/margins": 0.780997633934021, "rewards/rejected": -1.6361417770385742, "step": 2790 }, { "epoch": 2.93, "learning_rate": 1.8942542801741207e-07, "logits/chosen": -1.9003918170928955, "logits/rejected": -1.779552698135376, "logps/chosen": -433.330810546875, "logps/rejected": -451.95916748046875, "loss": 0.5042, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.9382666349411011, "rewards/margins": 0.6287131309509277, "rewards/rejected": -1.5669798851013184, "step": 2800 }, { "epoch": 2.93, "eval_logits/chosen": -1.9484288692474365, "eval_logits/rejected": -1.8400510549545288, "eval_logps/chosen": -447.79278564453125, "eval_logps/rejected": -460.85772705078125, "eval_loss": 0.574294924736023, "eval_rewards/accuracies": 0.7321428656578064, "eval_rewards/chosen": -0.984330415725708, "eval_rewards/margins": 0.5452856421470642, "eval_rewards/rejected": -1.5296159982681274, "eval_runtime": 367.54, "eval_samples_per_second": 5.442, "eval_steps_per_second": 0.171, "step": 2800 }, { "epoch": 2.94, "learning_rate": 1.8779826381605198e-07, "logits/chosen": -1.8743737936019897, "logits/rejected": -1.812796950340271, "logps/chosen": -486.4170837402344, "logps/rejected": -492.00811767578125, "loss": 0.5333, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -1.0714589357376099, "rewards/margins": 0.49343618750572205, "rewards/rejected": -1.5648950338363647, "step": 2810 }, { "epoch": 2.95, "learning_rate": 1.861739037213616e-07, "logits/chosen": -1.9613018035888672, "logits/rejected": -1.8247106075286865, "logps/chosen": -471.99530029296875, "logps/rejected": -517.2361450195312, "loss": 0.5048, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.8858569264411926, "rewards/margins": 0.7351481318473816, "rewards/rejected": -1.6210052967071533, "step": 2820 }, { "epoch": 2.96, "learning_rate": 1.845524209608627e-07, "logits/chosen": -1.8594707250595093, "logits/rejected": -1.7978969812393188, "logps/chosen": -451.421875, "logps/rejected": -477.1441955566406, "loss": 0.516, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.957187831401825, "rewards/margins": 0.6043495535850525, "rewards/rejected": -1.5615373849868774, "step": 2830 }, { "epoch": 2.97, "learning_rate": 1.8293388863236391e-07, "logits/chosen": -1.8253387212753296, "logits/rejected": -1.7428086996078491, "logps/chosen": -437.1249084472656, "logps/rejected": -436.38995361328125, "loss": 0.4964, "rewards/accuracies": 0.75, "rewards/chosen": -0.8720731735229492, "rewards/margins": 0.5980420708656311, "rewards/rejected": -1.4701151847839355, "step": 2840 }, { "epoch": 2.98, "learning_rate": 1.8131837970066635e-07, "logits/chosen": -1.9482589960098267, "logits/rejected": -1.8544782400131226, "logps/chosen": -473.46160888671875, "logps/rejected": -489.701904296875, "loss": 0.5122, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.9760125875473022, "rewards/margins": 0.6014086008071899, "rewards/rejected": -1.5774211883544922, "step": 2850 }, { "epoch": 2.99, "learning_rate": 1.7970596699427355e-07, "logits/chosen": -1.96894109249115, "logits/rejected": -1.8086011409759521, "logps/chosen": -452.5389099121094, "logps/rejected": -470.1752014160156, "loss": 0.4997, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.9787393808364868, "rewards/margins": 0.6899208426475525, "rewards/rejected": -1.6686604022979736, "step": 2860 }, { "epoch": 3.0, "learning_rate": 1.7809672320210872e-07, "logits/chosen": -1.898374319076538, "logits/rejected": -1.8311573266983032, "logps/chosen": -472.875244140625, "logps/rejected": -486.66265869140625, "loss": 0.5019, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.9755460619926453, "rewards/margins": 0.6229265928268433, "rewards/rejected": -1.5984727144241333, "step": 2870 }, { "epoch": 3.01, "learning_rate": 1.7649072087023784e-07, "logits/chosen": -1.9573551416397095, "logits/rejected": -1.8683230876922607, "logps/chosen": -464.22247314453125, "logps/rejected": -508.75341796875, "loss": 0.4887, "rewards/accuracies": 0.78125, "rewards/chosen": -0.9004373550415039, "rewards/margins": 0.733317494392395, "rewards/rejected": -1.6337547302246094, "step": 2880 }, { "epoch": 3.02, "learning_rate": 1.748880323985989e-07, "logits/chosen": -1.904697060585022, "logits/rejected": -1.7625919580459595, "logps/chosen": -442.80084228515625, "logps/rejected": -461.4578552246094, "loss": 0.4727, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.9410102963447571, "rewards/margins": 0.7015891671180725, "rewards/rejected": -1.6425994634628296, "step": 2890 }, { "epoch": 3.04, "learning_rate": 1.7328873003773848e-07, "logits/chosen": -1.9040206670761108, "logits/rejected": -1.8098185062408447, "logps/chosen": -448.4427795410156, "logps/rejected": -453.7347717285156, "loss": 0.4862, "rewards/accuracies": 0.78125, "rewards/chosen": -1.0325968265533447, "rewards/margins": 0.6374907493591309, "rewards/rejected": -1.6700875759124756, "step": 2900 }, { "epoch": 3.04, "eval_logits/chosen": -1.9315091371536255, "eval_logits/rejected": -1.821599006652832, "eval_logps/chosen": -452.8863220214844, "eval_logps/rejected": -467.03509521484375, "eval_loss": 0.575552225112915, "eval_rewards/accuracies": 0.7202380895614624, "eval_rewards/chosen": -1.0352660417556763, "eval_rewards/margins": 0.5561232566833496, "eval_rewards/rejected": -1.5913892984390259, "eval_runtime": 368.824, "eval_samples_per_second": 5.423, "eval_steps_per_second": 0.171, "step": 2900 }, { "epoch": 3.05, "learning_rate": 1.7169288588555424e-07, "logits/chosen": -1.881466269493103, "logits/rejected": -1.747097373008728, "logps/chosen": -449.9195251464844, "logps/rejected": -475.69586181640625, "loss": 0.4603, "rewards/accuracies": 0.8687499761581421, "rewards/chosen": -0.9050649404525757, "rewards/margins": 0.9311367869377136, "rewards/rejected": -1.8362019062042236, "step": 2910 }, { "epoch": 3.06, "learning_rate": 1.701005718840453e-07, "logits/chosen": -1.882340431213379, "logits/rejected": -1.7553138732910156, "logps/chosen": -452.6888732910156, "logps/rejected": -472.3089904785156, "loss": 0.4984, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.981615424156189, "rewards/margins": 0.6442556977272034, "rewards/rejected": -1.6258710622787476, "step": 2920 }, { "epoch": 3.07, "learning_rate": 1.6851185981606795e-07, "logits/chosen": -1.8996105194091797, "logits/rejected": -1.7734209299087524, "logps/chosen": -448.3755798339844, "logps/rejected": -456.1847229003906, "loss": 0.4861, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.964205265045166, "rewards/margins": 0.7373130917549133, "rewards/rejected": -1.7015184164047241, "step": 2930 }, { "epoch": 3.08, "learning_rate": 1.669268213021009e-07, "logits/chosen": -1.9143121242523193, "logits/rejected": -1.822167158126831, "logps/chosen": -435.2249450683594, "logps/rejected": -487.91827392578125, "loss": 0.4969, "rewards/accuracies": 0.78125, "rewards/chosen": -0.9106824994087219, "rewards/margins": 0.8411850929260254, "rewards/rejected": -1.751867651939392, "step": 2940 }, { "epoch": 3.09, "learning_rate": 1.6534552779701555e-07, "logits/chosen": -1.7521066665649414, "logits/rejected": -1.6937494277954102, "logps/chosen": -430.65081787109375, "logps/rejected": -494.86737060546875, "loss": 0.4794, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.9239088892936707, "rewards/margins": 0.80866938829422, "rewards/rejected": -1.7325782775878906, "step": 2950 }, { "epoch": 3.1, "learning_rate": 1.6376805058685538e-07, "logits/chosen": -1.8456952571868896, "logits/rejected": -1.7061046361923218, "logps/chosen": -420.896728515625, "logps/rejected": -445.06024169921875, "loss": 0.5017, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.9549886584281921, "rewards/margins": 0.6535457372665405, "rewards/rejected": -1.6085344552993774, "step": 2960 }, { "epoch": 3.11, "learning_rate": 1.6219446078562192e-07, "logits/chosen": -1.847887635231018, "logits/rejected": -1.7504163980484009, "logps/chosen": -455.67535400390625, "logps/rejected": -512.421142578125, "loss": 0.4828, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.9610303640365601, "rewards/margins": 0.8271835446357727, "rewards/rejected": -1.7882139682769775, "step": 2970 }, { "epoch": 3.12, "learning_rate": 1.6062482933206911e-07, "logits/chosen": -1.778282880783081, "logits/rejected": -1.736619234085083, "logps/chosen": -446.7540588378906, "logps/rejected": -529.2030029296875, "loss": 0.4945, "rewards/accuracies": 0.78125, "rewards/chosen": -1.0167171955108643, "rewards/margins": 0.7579668164253235, "rewards/rejected": -1.774683952331543, "step": 2980 }, { "epoch": 3.13, "learning_rate": 1.5905922698650536e-07, "logits/chosen": -1.877215027809143, "logits/rejected": -1.774298906326294, "logps/chosen": -445.44500732421875, "logps/rejected": -485.49560546875, "loss": 0.4743, "rewards/accuracies": 0.78125, "rewards/chosen": -0.9984437227249146, "rewards/margins": 0.8055132627487183, "rewards/rejected": -1.803957223892212, "step": 2990 }, { "epoch": 3.14, "learning_rate": 1.574977243276031e-07, "logits/chosen": -1.893512487411499, "logits/rejected": -1.7661199569702148, "logps/chosen": -487.65594482421875, "logps/rejected": -501.9541931152344, "loss": 0.4817, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.9194480180740356, "rewards/margins": 0.7937260866165161, "rewards/rejected": -1.7131742238998413, "step": 3000 }, { "epoch": 3.14, "eval_logits/chosen": -1.883595585823059, "eval_logits/rejected": -1.7716362476348877, "eval_logps/chosen": -453.8664245605469, "eval_logps/rejected": -469.6033630371094, "eval_loss": 0.5785899758338928, "eval_rewards/accuracies": 0.72817462682724, "eval_rewards/chosen": -1.0450665950775146, "eval_rewards/margins": 0.5720054507255554, "eval_rewards/rejected": -1.6170721054077148, "eval_runtime": 357.8971, "eval_samples_per_second": 5.588, "eval_steps_per_second": 0.176, "step": 3000 }, { "epoch": 3.15, "learning_rate": 1.5594039174921808e-07, "logits/chosen": -1.8807668685913086, "logits/rejected": -1.770019769668579, "logps/chosen": -436.2010192871094, "logps/rejected": -452.5880432128906, "loss": 0.5051, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.9551477432250977, "rewards/margins": 0.6700800061225891, "rewards/rejected": -1.6252275705337524, "step": 3010 }, { "epoch": 3.16, "learning_rate": 1.543872994572145e-07, "logits/chosen": -1.7073822021484375, "logits/rejected": -1.5819844007492065, "logps/chosen": -417.85791015625, "logps/rejected": -461.3323669433594, "loss": 0.4775, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.9456771612167358, "rewards/margins": 0.8424018621444702, "rewards/rejected": -1.7880792617797852, "step": 3020 }, { "epoch": 3.17, "learning_rate": 1.5283851746630173e-07, "logits/chosen": -1.8902000188827515, "logits/rejected": -1.8180053234100342, "logps/chosen": -436.1978454589844, "logps/rejected": -472.62860107421875, "loss": 0.5024, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.9585925936698914, "rewards/margins": 0.6844288110733032, "rewards/rejected": -1.6430212259292603, "step": 3030 }, { "epoch": 3.18, "learning_rate": 1.5129411559687632e-07, "logits/chosen": -1.8170684576034546, "logits/rejected": -1.6763471364974976, "logps/chosen": -446.4208068847656, "logps/rejected": -443.6482849121094, "loss": 0.5056, "rewards/accuracies": 0.75, "rewards/chosen": -1.0374691486358643, "rewards/margins": 0.6019953489303589, "rewards/rejected": -1.6394646167755127, "step": 3040 }, { "epoch": 3.19, "learning_rate": 1.4975416347187593e-07, "logits/chosen": -1.917802095413208, "logits/rejected": -1.7453248500823975, "logps/chosen": -473.4043884277344, "logps/rejected": -455.8473205566406, "loss": 0.4936, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.9156352281570435, "rewards/margins": 0.7537345886230469, "rewards/rejected": -1.6693699359893799, "step": 3050 }, { "epoch": 3.2, "learning_rate": 1.4821873051363955e-07, "logits/chosen": -1.8621914386749268, "logits/rejected": -1.749542474746704, "logps/chosen": -451.08282470703125, "logps/rejected": -497.8018493652344, "loss": 0.4788, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.9146499633789062, "rewards/margins": 0.7977242469787598, "rewards/rejected": -1.7123743295669556, "step": 3060 }, { "epoch": 3.21, "learning_rate": 1.4668788594077859e-07, "logits/chosen": -1.7952085733413696, "logits/rejected": -1.6200335025787354, "logps/chosen": -431.6758728027344, "logps/rejected": -463.505126953125, "loss": 0.4606, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.8738845586776733, "rewards/margins": 0.7860761880874634, "rewards/rejected": -1.6599609851837158, "step": 3070 }, { "epoch": 3.22, "learning_rate": 1.4516169876505596e-07, "logits/chosen": -1.8675405979156494, "logits/rejected": -1.6885595321655273, "logps/chosen": -459.55908203125, "logps/rejected": -451.66259765625, "loss": 0.5049, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.923184871673584, "rewards/margins": 0.6750983595848083, "rewards/rejected": -1.598283290863037, "step": 3080 }, { "epoch": 3.23, "learning_rate": 1.4364023778827538e-07, "logits/chosen": -1.8272289037704468, "logits/rejected": -1.7138440608978271, "logps/chosen": -444.3658752441406, "logps/rejected": -482.9459533691406, "loss": 0.4838, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -1.017207384109497, "rewards/margins": 0.6880172491073608, "rewards/rejected": -1.705224633216858, "step": 3090 }, { "epoch": 3.24, "learning_rate": 1.4212357159917942e-07, "logits/chosen": -1.8443920612335205, "logits/rejected": -1.7631704807281494, "logps/chosen": -409.96490478515625, "logps/rejected": -471.09027099609375, "loss": 0.4767, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.9304403066635132, "rewards/margins": 0.6487723588943481, "rewards/rejected": -1.5792125463485718, "step": 3100 }, { "epoch": 3.24, "eval_logits/chosen": -1.866295576095581, "eval_logits/rejected": -1.753827452659607, "eval_logps/chosen": -457.42584228515625, "eval_logps/rejected": -472.9984436035156, "eval_loss": 0.5770441293716431, "eval_rewards/accuracies": 0.726190447807312, "eval_rewards/chosen": -1.0806607007980347, "eval_rewards/margins": 0.5703620314598083, "eval_rewards/rejected": -1.6510227918624878, "eval_runtime": 378.5753, "eval_samples_per_second": 5.283, "eval_steps_per_second": 0.166, "step": 3100 }, { "epoch": 3.25, "learning_rate": 1.4061176857035765e-07, "logits/chosen": -1.8807693719863892, "logits/rejected": -1.8139241933822632, "logps/chosen": -469.0538635253906, "logps/rejected": -511.9051208496094, "loss": 0.4898, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.9563783407211304, "rewards/margins": 0.724995493888855, "rewards/rejected": -1.6813738346099854, "step": 3110 }, { "epoch": 3.27, "learning_rate": 1.391048968551643e-07, "logits/chosen": -1.739332914352417, "logits/rejected": -1.6512079238891602, "logps/chosen": -407.3808288574219, "logps/rejected": -492.01348876953125, "loss": 0.4484, "rewards/accuracies": 0.84375, "rewards/chosen": -0.9379755258560181, "rewards/margins": 0.8809243440628052, "rewards/rejected": -1.8188997507095337, "step": 3120 }, { "epoch": 3.28, "learning_rate": 1.376030243846456e-07, "logits/chosen": -1.8204562664031982, "logits/rejected": -1.735701322555542, "logps/chosen": -419.04693603515625, "logps/rejected": -456.9537048339844, "loss": 0.499, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.9971426725387573, "rewards/margins": 0.8115785717964172, "rewards/rejected": -1.8087211847305298, "step": 3130 }, { "epoch": 3.29, "learning_rate": 1.3610621886447792e-07, "logits/chosen": -1.8077905178070068, "logits/rejected": -1.7940162420272827, "logps/chosen": -393.58673095703125, "logps/rejected": -454.60418701171875, "loss": 0.4829, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.9757000207901001, "rewards/margins": 0.7894998788833618, "rewards/rejected": -1.765199899673462, "step": 3140 }, { "epoch": 3.3, "learning_rate": 1.3461454777191512e-07, "logits/chosen": -1.8403291702270508, "logits/rejected": -1.7041610479354858, "logps/chosen": -441.6806640625, "logps/rejected": -433.76751708984375, "loss": 0.4941, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.9537010192871094, "rewards/margins": 0.6518079042434692, "rewards/rejected": -1.6055090427398682, "step": 3150 }, { "epoch": 3.31, "learning_rate": 1.3312807835274676e-07, "logits/chosen": -1.8076324462890625, "logits/rejected": -1.7209094762802124, "logps/chosen": -429.984375, "logps/rejected": -467.8804626464844, "loss": 0.4794, "rewards/accuracies": 0.78125, "rewards/chosen": -1.0047121047973633, "rewards/margins": 0.705715000629425, "rewards/rejected": -1.710426926612854, "step": 3160 }, { "epoch": 3.32, "learning_rate": 1.3164687761826628e-07, "logits/chosen": -1.801439642906189, "logits/rejected": -1.6736797094345093, "logps/chosen": -430.87982177734375, "logps/rejected": -493.06671142578125, "loss": 0.47, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.0352692604064941, "rewards/margins": 0.8472986221313477, "rewards/rejected": -1.8825680017471313, "step": 3170 }, { "epoch": 3.33, "learning_rate": 1.3017101234225097e-07, "logits/chosen": -1.8457056283950806, "logits/rejected": -1.7224591970443726, "logps/chosen": -449.48101806640625, "logps/rejected": -476.2332458496094, "loss": 0.4734, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.9599472284317017, "rewards/margins": 0.7956485748291016, "rewards/rejected": -1.7555955648422241, "step": 3180 }, { "epoch": 3.34, "learning_rate": 1.2870054905795083e-07, "logits/chosen": -1.8150501251220703, "logits/rejected": -1.7427335977554321, "logps/chosen": -452.417724609375, "logps/rejected": -494.1238708496094, "loss": 0.4863, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.044499158859253, "rewards/margins": 0.6634548306465149, "rewards/rejected": -1.7079538106918335, "step": 3190 }, { "epoch": 3.35, "learning_rate": 1.272355540550893e-07, "logits/chosen": -1.6971858739852905, "logits/rejected": -1.6030826568603516, "logps/chosen": -432.5633850097656, "logps/rejected": -473.17950439453125, "loss": 0.4794, "rewards/accuracies": 0.78125, "rewards/chosen": -1.0610132217407227, "rewards/margins": 0.7416720390319824, "rewards/rejected": -1.8026853799819946, "step": 3200 }, { "epoch": 3.35, "eval_logits/chosen": -1.8515363931655884, "eval_logits/rejected": -1.738411784172058, "eval_logps/chosen": -460.2549743652344, "eval_logps/rejected": -476.874267578125, "eval_loss": 0.5789009928703308, "eval_rewards/accuracies": 0.726190447807312, "eval_rewards/chosen": -1.108952283859253, "eval_rewards/margins": 0.5808290839195251, "eval_rewards/rejected": -1.6897813081741333, "eval_runtime": 388.0721, "eval_samples_per_second": 5.154, "eval_steps_per_second": 0.162, "step": 3200 }, { "epoch": 3.36, "learning_rate": 1.2577609337687545e-07, "logits/chosen": -1.7929632663726807, "logits/rejected": -1.6986091136932373, "logps/chosen": -415.9329528808594, "logps/rejected": -493.5342712402344, "loss": 0.4843, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -1.0511469841003418, "rewards/margins": 0.8548563718795776, "rewards/rejected": -1.9060032367706299, "step": 3210 }, { "epoch": 3.37, "learning_rate": 1.2432223281702616e-07, "logits/chosen": -1.8026511669158936, "logits/rejected": -1.7878223657608032, "logps/chosen": -420.894287109375, "logps/rejected": -473.5709533691406, "loss": 0.4781, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.0878639221191406, "rewards/margins": 0.5462750196456909, "rewards/rejected": -1.634138822555542, "step": 3220 }, { "epoch": 3.38, "learning_rate": 1.228740379168004e-07, "logits/chosen": -1.7239850759506226, "logits/rejected": -1.6399204730987549, "logps/chosen": -473.14208984375, "logps/rejected": -485.84442138671875, "loss": 0.4803, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.0646392107009888, "rewards/margins": 0.8183633685112, "rewards/rejected": -1.8830026388168335, "step": 3230 }, { "epoch": 3.39, "learning_rate": 1.2143157396204415e-07, "logits/chosen": -1.7619224786758423, "logits/rejected": -1.7232242822647095, "logps/chosen": -421.8963928222656, "logps/rejected": -484.1393127441406, "loss": 0.5063, "rewards/accuracies": 0.75, "rewards/chosen": -1.0958998203277588, "rewards/margins": 0.6340087652206421, "rewards/rejected": -1.7299085855484009, "step": 3240 }, { "epoch": 3.4, "learning_rate": 1.199949059802478e-07, "logits/chosen": -1.8530910015106201, "logits/rejected": -1.7129993438720703, "logps/chosen": -473.8306579589844, "logps/rejected": -485.2450256347656, "loss": 0.4622, "rewards/accuracies": 0.84375, "rewards/chosen": -0.9937135577201843, "rewards/margins": 0.8251352310180664, "rewards/rejected": -1.8188488483428955, "step": 3250 }, { "epoch": 3.41, "learning_rate": 1.1856409873761428e-07, "logits/chosen": -1.750261902809143, "logits/rejected": -1.646456003189087, "logps/chosen": -430.5122985839844, "logps/rejected": -435.7897033691406, "loss": 0.4918, "rewards/accuracies": 0.78125, "rewards/chosen": -0.9967744946479797, "rewards/margins": 0.6008384227752686, "rewards/rejected": -1.5976128578186035, "step": 3260 }, { "epoch": 3.42, "learning_rate": 1.1713921673613961e-07, "logits/chosen": -1.855929970741272, "logits/rejected": -1.7337143421173096, "logps/chosen": -441.72998046875, "logps/rejected": -476.8902893066406, "loss": 0.4857, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -1.0006901025772095, "rewards/margins": 0.8095808029174805, "rewards/rejected": -1.81027090549469, "step": 3270 }, { "epoch": 3.43, "learning_rate": 1.1572032421070452e-07, "logits/chosen": -1.7859611511230469, "logits/rejected": -1.5934228897094727, "logps/chosen": -472.6265563964844, "logps/rejected": -509.2264709472656, "loss": 0.4612, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.0804340839385986, "rewards/margins": 0.8621044158935547, "rewards/rejected": -1.9425384998321533, "step": 3280 }, { "epoch": 3.44, "learning_rate": 1.1430748512617974e-07, "logits/chosen": -1.9406483173370361, "logits/rejected": -1.8423467874526978, "logps/chosen": -465.8262634277344, "logps/rejected": -487.7626037597656, "loss": 0.4756, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.089054822921753, "rewards/margins": 0.6790697574615479, "rewards/rejected": -1.7681243419647217, "step": 3290 }, { "epoch": 3.46, "learning_rate": 1.1290076317454142e-07, "logits/chosen": -1.7945177555084229, "logits/rejected": -1.6354175806045532, "logps/chosen": -451.65625, "logps/rejected": -489.162841796875, "loss": 0.4784, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -1.0753741264343262, "rewards/margins": 0.7434049844741821, "rewards/rejected": -1.8187793493270874, "step": 3300 }, { "epoch": 3.46, "eval_logits/chosen": -1.844208002090454, "eval_logits/rejected": -1.731345772743225, "eval_logps/chosen": -468.6473388671875, "eval_logps/rejected": -486.3179626464844, "eval_loss": 0.573898196220398, "eval_rewards/accuracies": 0.716269850730896, "eval_rewards/chosen": -1.1928762197494507, "eval_rewards/margins": 0.5913423895835876, "eval_rewards/rejected": -1.7842185497283936, "eval_runtime": 214.6932, "eval_samples_per_second": 9.316, "eval_steps_per_second": 0.293, "step": 3300 }, { "epoch": 3.47, "learning_rate": 1.115002217720001e-07, "logits/chosen": -1.7635328769683838, "logits/rejected": -1.6847097873687744, "logps/chosen": -431.44573974609375, "logps/rejected": -467.83251953125, "loss": 0.4942, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.0202503204345703, "rewards/margins": 0.7337538003921509, "rewards/rejected": -1.7540042400360107, "step": 3310 }, { "epoch": 3.48, "learning_rate": 1.1010592405614221e-07, "logits/chosen": -1.7923495769500732, "logits/rejected": -1.7173646688461304, "logps/chosen": -435.59906005859375, "logps/rejected": -487.1529235839844, "loss": 0.4797, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.1358600854873657, "rewards/margins": 0.7092723846435547, "rewards/rejected": -1.8451322317123413, "step": 3320 }, { "epoch": 3.49, "learning_rate": 1.087179328830834e-07, "logits/chosen": -1.7255041599273682, "logits/rejected": -1.6920020580291748, "logps/chosen": -379.7822265625, "logps/rejected": -449.1914978027344, "loss": 0.4954, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.0361435413360596, "rewards/margins": 0.6081300973892212, "rewards/rejected": -1.6442735195159912, "step": 3330 }, { "epoch": 3.5, "learning_rate": 1.0733631082463517e-07, "logits/chosen": -1.8408622741699219, "logits/rejected": -1.711627721786499, "logps/chosen": -470.7630310058594, "logps/rejected": -467.80767822265625, "loss": 0.4856, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.1355063915252686, "rewards/margins": 0.6574573516845703, "rewards/rejected": -1.7929637432098389, "step": 3340 }, { "epoch": 3.51, "learning_rate": 1.0596112016548372e-07, "logits/chosen": -1.770371675491333, "logits/rejected": -1.734692931175232, "logps/chosen": -418.6500549316406, "logps/rejected": -466.70330810546875, "loss": 0.4953, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -1.0982646942138672, "rewards/margins": 0.5657511949539185, "rewards/rejected": -1.664015769958496, "step": 3350 }, { "epoch": 3.52, "learning_rate": 1.0459242290038259e-07, "logits/chosen": -1.820656180381775, "logits/rejected": -1.7680647373199463, "logps/chosen": -428.38922119140625, "logps/rejected": -440.9400939941406, "loss": 0.5059, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.0621531009674072, "rewards/margins": 0.5993833541870117, "rewards/rejected": -1.661536455154419, "step": 3360 }, { "epoch": 3.53, "learning_rate": 1.0323028073135756e-07, "logits/chosen": -1.7957019805908203, "logits/rejected": -1.682960867881775, "logps/chosen": -457.93780517578125, "logps/rejected": -469.3650817871094, "loss": 0.4787, "rewards/accuracies": 0.893750011920929, "rewards/chosen": -0.8964568972587585, "rewards/margins": 0.7799767255783081, "rewards/rejected": -1.676433801651001, "step": 3370 }, { "epoch": 3.54, "learning_rate": 1.0187475506492526e-07, "logits/chosen": -1.8339675664901733, "logits/rejected": -1.712244987487793, "logps/chosen": -443.6895446777344, "logps/rejected": -467.30810546875, "loss": 0.487, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -1.044893503189087, "rewards/margins": 0.7343738079071045, "rewards/rejected": -1.7792673110961914, "step": 3380 }, { "epoch": 3.55, "learning_rate": 1.0052590700932445e-07, "logits/chosen": -1.8773367404937744, "logits/rejected": -1.78768789768219, "logps/chosen": -436.32806396484375, "logps/rejected": -466.9984436035156, "loss": 0.4934, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.0362013578414917, "rewards/margins": 0.7617989182472229, "rewards/rejected": -1.7980003356933594, "step": 3390 }, { "epoch": 3.56, "learning_rate": 9.918379737176207e-08, "logits/chosen": -1.7708876132965088, "logits/rejected": -1.7042875289916992, "logps/chosen": -430.687255859375, "logps/rejected": -476.30035400390625, "loss": 0.4797, "rewards/accuracies": 0.8125, "rewards/chosen": -1.0874220132827759, "rewards/margins": 0.6192290782928467, "rewards/rejected": -1.7066510915756226, "step": 3400 }, { "epoch": 3.56, "eval_logits/chosen": -1.846415400505066, "eval_logits/rejected": -1.7339593172073364, "eval_logps/chosen": -464.2335510253906, "eval_logps/rejected": -480.9566345214844, "eval_loss": 0.5754001140594482, "eval_rewards/accuracies": 0.7202380895614624, "eval_rewards/chosen": -1.1487380266189575, "eval_rewards/margins": 0.5818668603897095, "eval_rewards/rejected": -1.730604887008667, "eval_runtime": 230.6525, "eval_samples_per_second": 8.671, "eval_steps_per_second": 0.273, "step": 3400 }, { "epoch": 3.57, "learning_rate": 9.78484866556713e-08, "logits/chosen": -1.7351133823394775, "logits/rejected": -1.6162784099578857, "logps/chosen": -431.660400390625, "logps/rejected": -473.376220703125, "loss": 0.4657, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.9811315536499023, "rewards/margins": 0.7743980288505554, "rewards/rejected": -1.755529761314392, "step": 3410 }, { "epoch": 3.58, "learning_rate": 9.652003505798397e-08, "logits/chosen": -1.7996597290039062, "logits/rejected": -1.6331745386123657, "logps/chosen": -447.5265197753906, "logps/rejected": -471.54364013671875, "loss": 0.4628, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.9811578989028931, "rewards/margins": 0.927898108959198, "rewards/rejected": -1.9090559482574463, "step": 3420 }, { "epoch": 3.59, "learning_rate": 9.519850246641739e-08, "logits/chosen": -1.8197190761566162, "logits/rejected": -1.692530632019043, "logps/chosen": -466.42608642578125, "logps/rejected": -493.58819580078125, "loss": 0.5109, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.1230065822601318, "rewards/margins": 0.6617223024368286, "rewards/rejected": -1.784728765487671, "step": 3430 }, { "epoch": 3.6, "learning_rate": 9.38839484567741e-08, "logits/chosen": -1.8353790044784546, "logits/rejected": -1.7911113500595093, "logps/chosen": -457.2601013183594, "logps/rejected": -514.238037109375, "loss": 0.4777, "rewards/accuracies": 0.75, "rewards/chosen": -1.0778439044952393, "rewards/margins": 0.7159544825553894, "rewards/rejected": -1.7937984466552734, "step": 3440 }, { "epoch": 3.61, "learning_rate": 9.25764322902564e-08, "logits/chosen": -1.8523584604263306, "logits/rejected": -1.746606469154358, "logps/chosen": -434.7613830566406, "logps/rejected": -508.73553466796875, "loss": 0.4831, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -0.9743539094924927, "rewards/margins": 0.7968435883522034, "rewards/rejected": -1.7711975574493408, "step": 3450 }, { "epoch": 3.62, "learning_rate": 9.127601291079436e-08, "logits/chosen": -1.73825204372406, "logits/rejected": -1.714342713356018, "logps/chosen": -428.89892578125, "logps/rejected": -511.9532775878906, "loss": 0.4928, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -1.1295487880706787, "rewards/margins": 0.6823315024375916, "rewards/rejected": -1.811880350112915, "step": 3460 }, { "epoch": 3.63, "learning_rate": 8.998274894238953e-08, "logits/chosen": -1.8188636302947998, "logits/rejected": -1.7010266780853271, "logps/chosen": -440.44732666015625, "logps/rejected": -499.5472106933594, "loss": 0.4817, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -1.0014610290527344, "rewards/margins": 0.7511878609657288, "rewards/rejected": -1.7526487112045288, "step": 3470 }, { "epoch": 3.64, "learning_rate": 8.869669868647084e-08, "logits/chosen": -1.884778380393982, "logits/rejected": -1.7300710678100586, "logps/chosen": -481.62237548828125, "logps/rejected": -493.79840087890625, "loss": 0.4825, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.0563971996307373, "rewards/margins": 0.7872760891914368, "rewards/rejected": -1.8436731100082397, "step": 3480 }, { "epoch": 3.65, "learning_rate": 8.741792011926736e-08, "logits/chosen": -1.8345582485198975, "logits/rejected": -1.776908278465271, "logps/chosen": -457.0000915527344, "logps/rejected": -501.9378967285156, "loss": 0.5381, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -1.134314775466919, "rewards/margins": 0.6081364750862122, "rewards/rejected": -1.7424513101577759, "step": 3490 }, { "epoch": 3.66, "learning_rate": 8.614647088919424e-08, "logits/chosen": -1.7891185283660889, "logits/rejected": -1.7254148721694946, "logps/chosen": -481.73486328125, "logps/rejected": -502.29901123046875, "loss": 0.4967, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.0791096687316895, "rewards/margins": 0.6187223792076111, "rewards/rejected": -1.6978321075439453, "step": 3500 }, { "epoch": 3.66, "eval_logits/chosen": -1.845801830291748, "eval_logits/rejected": -1.7331349849700928, "eval_logps/chosen": -462.4029846191406, "eval_logps/rejected": -478.66900634765625, "eval_loss": 0.576257586479187, "eval_rewards/accuracies": 0.72817462682724, "eval_rewards/chosen": -1.1304326057434082, "eval_rewards/margins": 0.5772957801818848, "eval_rewards/rejected": -1.7077282667160034, "eval_runtime": 232.7718, "eval_samples_per_second": 8.592, "eval_steps_per_second": 0.271, "step": 3500 }, { "epoch": 3.68, "learning_rate": 8.488240831425395e-08, "logits/chosen": -1.6944081783294678, "logits/rejected": -1.602821946144104, "logps/chosen": -444.11724853515625, "logps/rejected": -483.67230224609375, "loss": 0.4754, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.1205617189407349, "rewards/margins": 0.7284508943557739, "rewards/rejected": -1.8490123748779297, "step": 3510 }, { "epoch": 3.69, "learning_rate": 8.362578937945231e-08, "logits/chosen": -1.7747758626937866, "logits/rejected": -1.6951490640640259, "logps/chosen": -469.2557678222656, "logps/rejected": -511.41839599609375, "loss": 0.4801, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.9889723658561707, "rewards/margins": 0.733228325843811, "rewards/rejected": -1.7222007513046265, "step": 3520 }, { "epoch": 3.7, "learning_rate": 8.237667073422943e-08, "logits/chosen": -1.801891565322876, "logits/rejected": -1.6538407802581787, "logps/chosen": -435.76861572265625, "logps/rejected": -445.46368408203125, "loss": 0.4818, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.061366319656372, "rewards/margins": 0.6765199899673462, "rewards/rejected": -1.7378864288330078, "step": 3530 }, { "epoch": 3.71, "learning_rate": 8.113510868990626e-08, "logits/chosen": -1.8102309703826904, "logits/rejected": -1.761523962020874, "logps/chosen": -455.29010009765625, "logps/rejected": -522.002197265625, "loss": 0.4912, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.0575816631317139, "rewards/margins": 0.6482858061790466, "rewards/rejected": -1.7058674097061157, "step": 3540 }, { "epoch": 3.72, "learning_rate": 7.990115921714571e-08, "logits/chosen": -1.7839081287384033, "logits/rejected": -1.6682395935058594, "logps/chosen": -429.5260314941406, "logps/rejected": -455.45111083984375, "loss": 0.5005, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.0066113471984863, "rewards/margins": 0.7589043378829956, "rewards/rejected": -1.765515685081482, "step": 3550 }, { "epoch": 3.73, "learning_rate": 7.867487794342966e-08, "logits/chosen": -1.7547132968902588, "logits/rejected": -1.6351432800292969, "logps/chosen": -486.7994079589844, "logps/rejected": -483.152099609375, "loss": 0.4809, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.0975134372711182, "rewards/margins": 0.6140449643135071, "rewards/rejected": -1.7115581035614014, "step": 3560 }, { "epoch": 3.74, "learning_rate": 7.745632015055079e-08, "logits/chosen": -1.7974326610565186, "logits/rejected": -1.7494417428970337, "logps/chosen": -408.68145751953125, "logps/rejected": -484.828125, "loss": 0.4701, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.9123633503913879, "rewards/margins": 0.703347384929657, "rewards/rejected": -1.6157108545303345, "step": 3570 }, { "epoch": 3.75, "learning_rate": 7.624554077212128e-08, "logits/chosen": -1.7972793579101562, "logits/rejected": -1.7015396356582642, "logps/chosen": -466.3648376464844, "logps/rejected": -488.1949157714844, "loss": 0.4937, "rewards/accuracies": 0.8125, "rewards/chosen": -0.9682089686393738, "rewards/margins": 0.7773339152336121, "rewards/rejected": -1.7455428838729858, "step": 3580 }, { "epoch": 3.76, "learning_rate": 7.504259439109534e-08, "logits/chosen": -1.6706613302230835, "logits/rejected": -1.5966769456863403, "logps/chosen": -427.3377990722656, "logps/rejected": -471.311279296875, "loss": 0.5085, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.0817948579788208, "rewards/margins": 0.7141741514205933, "rewards/rejected": -1.795969009399414, "step": 3590 }, { "epoch": 3.77, "learning_rate": 7.384753523730935e-08, "logits/chosen": -1.8219425678253174, "logits/rejected": -1.7524003982543945, "logps/chosen": -445.67999267578125, "logps/rejected": -502.49163818359375, "loss": 0.4747, "rewards/accuracies": 0.8125, "rewards/chosen": -0.9601839184761047, "rewards/margins": 0.7520685195922852, "rewards/rejected": -1.7122526168823242, "step": 3600 }, { "epoch": 3.77, "eval_logits/chosen": -1.8401782512664795, "eval_logits/rejected": -1.7268399000167847, "eval_logps/chosen": -462.3710021972656, "eval_logps/rejected": -479.5740661621094, "eval_loss": 0.5767081379890442, "eval_rewards/accuracies": 0.726190447807312, "eval_rewards/chosen": -1.1301121711730957, "eval_rewards/margins": 0.5866668820381165, "eval_rewards/rejected": -1.7167788743972778, "eval_runtime": 227.946, "eval_samples_per_second": 8.774, "eval_steps_per_second": 0.276, "step": 3600 }, { "epoch": 3.78, "learning_rate": 7.266041718503671e-08, "logits/chosen": -1.7200260162353516, "logits/rejected": -1.6315845251083374, "logps/chosen": -432.0589294433594, "logps/rejected": -461.25079345703125, "loss": 0.5043, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.9935817718505859, "rewards/margins": 0.6516721844673157, "rewards/rejected": -1.6452537775039673, "step": 3610 }, { "epoch": 3.79, "learning_rate": 7.148129375055936e-08, "logits/chosen": -1.8993425369262695, "logits/rejected": -1.69882071018219, "logps/chosen": -478.901123046875, "logps/rejected": -489.272216796875, "loss": 0.4855, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.928783118724823, "rewards/margins": 0.854119598865509, "rewards/rejected": -1.782902479171753, "step": 3620 }, { "epoch": 3.8, "learning_rate": 7.031021808975518e-08, "logits/chosen": -1.9520289897918701, "logits/rejected": -1.785881757736206, "logps/chosen": -485.9696350097656, "logps/rejected": -474.34893798828125, "loss": 0.4884, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.9730976819992065, "rewards/margins": 0.8256509900093079, "rewards/rejected": -1.7987486124038696, "step": 3630 }, { "epoch": 3.81, "learning_rate": 6.914724299570127e-08, "logits/chosen": -1.9349133968353271, "logits/rejected": -1.8336073160171509, "logps/chosen": -470.2247619628906, "logps/rejected": -479.83624267578125, "loss": 0.5086, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -1.0859735012054443, "rewards/margins": 0.6873432397842407, "rewards/rejected": -1.7733169794082642, "step": 3640 }, { "epoch": 3.82, "learning_rate": 6.799242089629497e-08, "logits/chosen": -1.687898874282837, "logits/rejected": -1.6447770595550537, "logps/chosen": -406.9367370605469, "logps/rejected": -469.34552001953125, "loss": 0.4775, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -1.0913324356079102, "rewards/margins": 0.66700279712677, "rewards/rejected": -1.7583353519439697, "step": 3650 }, { "epoch": 3.83, "learning_rate": 6.684580385188917e-08, "logits/chosen": -1.778376817703247, "logits/rejected": -1.7570167779922485, "logps/chosen": -451.86871337890625, "logps/rejected": -502.09478759765625, "loss": 0.4809, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.0968141555786133, "rewards/margins": 0.6900449991226196, "rewards/rejected": -1.786859154701233, "step": 3660 }, { "epoch": 3.84, "learning_rate": 6.570744355294642e-08, "logits/chosen": -1.856090784072876, "logits/rejected": -1.7611362934112549, "logps/chosen": -461.3770446777344, "logps/rejected": -473.8399963378906, "loss": 0.5269, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -1.1256920099258423, "rewards/margins": 0.5923901796340942, "rewards/rejected": -1.7180821895599365, "step": 3670 }, { "epoch": 3.85, "learning_rate": 6.45773913177077e-08, "logits/chosen": -1.7539339065551758, "logits/rejected": -1.698042869567871, "logps/chosen": -442.38128662109375, "logps/rejected": -486.7842712402344, "loss": 0.468, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.0839567184448242, "rewards/margins": 0.7219823002815247, "rewards/rejected": -1.805938959121704, "step": 3680 }, { "epoch": 3.86, "learning_rate": 6.345569808988019e-08, "logits/chosen": -1.800353765487671, "logits/rejected": -1.6878328323364258, "logps/chosen": -438.8720703125, "logps/rejected": -452.9892578125, "loss": 0.5116, "rewards/accuracies": 0.75, "rewards/chosen": -1.0886685848236084, "rewards/margins": 0.643202006816864, "rewards/rejected": -1.7318706512451172, "step": 3690 }, { "epoch": 3.87, "learning_rate": 6.23424144363393e-08, "logits/chosen": -1.8801469802856445, "logits/rejected": -1.7438074350357056, "logps/chosen": -430.7373962402344, "logps/rejected": -452.6865234375, "loss": 0.4895, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.0514805316925049, "rewards/margins": 0.6827796697616577, "rewards/rejected": -1.7342602014541626, "step": 3700 }, { "epoch": 3.87, "eval_logits/chosen": -1.84304678440094, "eval_logits/rejected": -1.7301536798477173, "eval_logps/chosen": -463.2915344238281, "eval_logps/rejected": -479.66912841796875, "eval_loss": 0.5747166872024536, "eval_rewards/accuracies": 0.7202380895614624, "eval_rewards/chosen": -1.1393183469772339, "eval_rewards/margins": 0.5784114003181458, "eval_rewards/rejected": -1.7177296876907349, "eval_runtime": 226.3769, "eval_samples_per_second": 8.835, "eval_steps_per_second": 0.278, "step": 3700 }, { "epoch": 3.88, "learning_rate": 6.123759054485015e-08, "logits/chosen": -1.8235986232757568, "logits/rejected": -1.6753406524658203, "logps/chosen": -486.02740478515625, "logps/rejected": -501.69476318359375, "loss": 0.4715, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.9594398736953735, "rewards/margins": 0.8951163291931152, "rewards/rejected": -1.8545563220977783, "step": 3710 }, { "epoch": 3.89, "learning_rate": 6.014127622180452e-08, "logits/chosen": -1.8062944412231445, "logits/rejected": -1.7111324071884155, "logps/chosen": -451.428466796875, "logps/rejected": -489.58074951171875, "loss": 0.4911, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.9969078302383423, "rewards/margins": 0.6812020540237427, "rewards/rejected": -1.6781097650527954, "step": 3720 }, { "epoch": 3.91, "learning_rate": 5.90535208899757e-08, "logits/chosen": -1.789720892906189, "logits/rejected": -1.6432859897613525, "logps/chosen": -454.414794921875, "logps/rejected": -467.2537536621094, "loss": 0.4889, "rewards/accuracies": 0.78125, "rewards/chosen": -1.1260120868682861, "rewards/margins": 0.6561237573623657, "rewards/rejected": -1.7821362018585205, "step": 3730 }, { "epoch": 3.92, "learning_rate": 5.797437358629051e-08, "logits/chosen": -1.8378006219863892, "logits/rejected": -1.74234139919281, "logps/chosen": -448.30377197265625, "logps/rejected": -492.735595703125, "loss": 0.482, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.9833332896232605, "rewards/margins": 0.8310686349868774, "rewards/rejected": -1.814401626586914, "step": 3740 }, { "epoch": 3.93, "learning_rate": 5.6903882959618317e-08, "logits/chosen": -1.8451528549194336, "logits/rejected": -1.6656252145767212, "logps/chosen": -457.91973876953125, "logps/rejected": -448.98974609375, "loss": 0.4921, "rewards/accuracies": 0.78125, "rewards/chosen": -0.9729013442993164, "rewards/margins": 0.7378336191177368, "rewards/rejected": -1.7107349634170532, "step": 3750 }, { "epoch": 3.94, "learning_rate": 5.584209726857872e-08, "logits/chosen": -1.8105888366699219, "logits/rejected": -1.7128746509552002, "logps/chosen": -460.19171142578125, "logps/rejected": -504.76812744140625, "loss": 0.4847, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -1.0890675783157349, "rewards/margins": 0.790399432182312, "rewards/rejected": -1.8794670104980469, "step": 3760 }, { "epoch": 3.95, "learning_rate": 5.478906437936501e-08, "logits/chosen": -1.7644094228744507, "logits/rejected": -1.700378179550171, "logps/chosen": -456.9208984375, "logps/rejected": -476.3636169433594, "loss": 0.4955, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -1.0362350940704346, "rewards/margins": 0.5969945192337036, "rewards/rejected": -1.6332294940948486, "step": 3770 }, { "epoch": 3.96, "learning_rate": 5.374483176358696e-08, "logits/chosen": -1.7678935527801514, "logits/rejected": -1.7221410274505615, "logps/chosen": -440.03759765625, "logps/rejected": -522.8914184570312, "loss": 0.4722, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -1.046316385269165, "rewards/margins": 0.8649559020996094, "rewards/rejected": -1.9112722873687744, "step": 3780 }, { "epoch": 3.97, "learning_rate": 5.2709446496130685e-08, "logits/chosen": -1.7751652002334595, "logits/rejected": -1.7962526082992554, "logps/chosen": -424.12384033203125, "logps/rejected": -523.4373779296875, "loss": 0.4674, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.9684499502182007, "rewards/margins": 0.8860515356063843, "rewards/rejected": -1.854501485824585, "step": 3790 }, { "epoch": 3.98, "learning_rate": 5.1682955253036286e-08, "logits/chosen": -1.750946283340454, "logits/rejected": -1.570204734802246, "logps/chosen": -469.3467712402344, "logps/rejected": -441.6266174316406, "loss": 0.5118, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -1.075725793838501, "rewards/margins": 0.7432368397712708, "rewards/rejected": -1.8189626932144165, "step": 3800 }, { "epoch": 3.98, "eval_logits/chosen": -1.841734766960144, "eval_logits/rejected": -1.7281790971755981, "eval_logps/chosen": -464.1390075683594, "eval_logps/rejected": -481.311767578125, "eval_loss": 0.5743067860603333, "eval_rewards/accuracies": 0.726190447807312, "eval_rewards/chosen": -1.1477924585342407, "eval_rewards/margins": 0.5863636136054993, "eval_rewards/rejected": -1.7341560125350952, "eval_runtime": 249.2328, "eval_samples_per_second": 8.025, "eval_steps_per_second": 0.253, "step": 3800 }, { "epoch": 3.99, "learning_rate": 5.066540430939384e-08, "logits/chosen": -1.8474823236465454, "logits/rejected": -1.6917006969451904, "logps/chosen": -473.05572509765625, "logps/rejected": -487.7589416503906, "loss": 0.4862, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.088275671005249, "rewards/margins": 0.6751676201820374, "rewards/rejected": -1.7634432315826416, "step": 3810 }, { "epoch": 4.0, "learning_rate": 4.965683953725705e-08, "logits/chosen": -1.8910309076309204, "logits/rejected": -1.8070173263549805, "logps/chosen": -462.40985107421875, "logps/rejected": -494.81427001953125, "loss": 0.5017, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -1.032575011253357, "rewards/margins": 0.6556354761123657, "rewards/rejected": -1.6882108449935913, "step": 3820 }, { "epoch": 4.01, "learning_rate": 4.8657306403575546e-08, "logits/chosen": -1.9338630437850952, "logits/rejected": -1.8257001638412476, "logps/chosen": -471.525390625, "logps/rejected": -491.8013610839844, "loss": 0.4745, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.9843519330024719, "rewards/margins": 0.7315724492073059, "rewards/rejected": -1.7159245014190674, "step": 3830 }, { "epoch": 4.02, "learning_rate": 4.766684996814505e-08, "logits/chosen": -1.7188827991485596, "logits/rejected": -1.670910120010376, "logps/chosen": -475.8091735839844, "logps/rejected": -488.88433837890625, "loss": 0.4848, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -1.1858031749725342, "rewards/margins": 0.6162821054458618, "rewards/rejected": -1.802085280418396, "step": 3840 }, { "epoch": 4.03, "learning_rate": 4.6685514881576184e-08, "logits/chosen": -1.8002866506576538, "logits/rejected": -1.6799087524414062, "logps/chosen": -458.14410400390625, "logps/rejected": -469.3955078125, "loss": 0.4868, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -1.0560219287872314, "rewards/margins": 0.7603785395622253, "rewards/rejected": -1.8164005279541016, "step": 3850 }, { "epoch": 4.04, "learning_rate": 4.5713345383281225e-08, "logits/chosen": -1.824496865272522, "logits/rejected": -1.7280305624008179, "logps/chosen": -446.33746337890625, "logps/rejected": -480.5821838378906, "loss": 0.4587, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -1.101201057434082, "rewards/margins": 0.7129907011985779, "rewards/rejected": -1.8141918182373047, "step": 3860 }, { "epoch": 4.05, "learning_rate": 4.475038529948036e-08, "logits/chosen": -1.7647641897201538, "logits/rejected": -1.7041082382202148, "logps/chosen": -423.53021240234375, "logps/rejected": -502.34637451171875, "loss": 0.4692, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.9148675799369812, "rewards/margins": 0.9240180253982544, "rewards/rejected": -1.8388856649398804, "step": 3870 }, { "epoch": 4.06, "learning_rate": 4.379667804122531e-08, "logits/chosen": -1.758404016494751, "logits/rejected": -1.624509572982788, "logps/chosen": -421.4674377441406, "logps/rejected": -452.26361083984375, "loss": 0.4571, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -1.049045443534851, "rewards/margins": 0.7535545825958252, "rewards/rejected": -1.8025999069213867, "step": 3880 }, { "epoch": 4.07, "learning_rate": 4.285226660244273e-08, "logits/chosen": -1.747127890586853, "logits/rejected": -1.6422996520996094, "logps/chosen": -437.3968200683594, "logps/rejected": -486.3650817871094, "loss": 0.491, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.0798274278640747, "rewards/margins": 0.7409617900848389, "rewards/rejected": -1.820789098739624, "step": 3890 }, { "epoch": 4.08, "learning_rate": 4.191719355799595e-08, "logits/chosen": -1.7357877492904663, "logits/rejected": -1.6818689107894897, "logps/chosen": -445.1678161621094, "logps/rejected": -490.9187927246094, "loss": 0.5007, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.124782919883728, "rewards/margins": 0.6258308291435242, "rewards/rejected": -1.750613808631897, "step": 3900 }, { "epoch": 4.08, "eval_logits/chosen": -1.8403288125991821, "eval_logits/rejected": -1.7268848419189453, "eval_logps/chosen": -462.8506774902344, "eval_logps/rejected": -480.0435791015625, "eval_loss": 0.5753123760223389, "eval_rewards/accuracies": 0.72817462682724, "eval_rewards/chosen": -1.1349091529846191, "eval_rewards/margins": 0.5865655541419983, "eval_rewards/rejected": -1.7214747667312622, "eval_runtime": 224.0163, "eval_samples_per_second": 8.928, "eval_steps_per_second": 0.281, "step": 3900 }, { "epoch": 4.09, "learning_rate": 4.0991501061765574e-08, "logits/chosen": -1.8390766382217407, "logits/rejected": -1.7180551290512085, "logps/chosen": -449.53582763671875, "logps/rejected": -492.67864990234375, "loss": 0.4856, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.0572012662887573, "rewards/margins": 0.7417745590209961, "rewards/rejected": -1.798975944519043, "step": 3910 }, { "epoch": 4.1, "learning_rate": 4.007523084474929e-08, "logits/chosen": -1.796332597732544, "logits/rejected": -1.685544729232788, "logps/chosen": -465.2630920410156, "logps/rejected": -485.39776611328125, "loss": 0.4619, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.0321013927459717, "rewards/margins": 0.8061541318893433, "rewards/rejected": -1.8382556438446045, "step": 3920 }, { "epoch": 4.11, "learning_rate": 3.916842421318015e-08, "logits/chosen": -1.7621917724609375, "logits/rejected": -1.6985727548599243, "logps/chosen": -416.25469970703125, "logps/rejected": -488.31103515625, "loss": 0.4732, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.0770134925842285, "rewards/margins": 0.7757894396781921, "rewards/rejected": -1.8528029918670654, "step": 3930 }, { "epoch": 4.13, "learning_rate": 3.8271122046665326e-08, "logits/chosen": -1.7745786905288696, "logits/rejected": -1.7049392461776733, "logps/chosen": -420.1312561035156, "logps/rejected": -456.5462951660156, "loss": 0.4799, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.0381975173950195, "rewards/margins": 0.7507133483886719, "rewards/rejected": -1.7889106273651123, "step": 3940 }, { "epoch": 4.14, "learning_rate": 3.738336479634227e-08, "logits/chosen": -1.744370460510254, "logits/rejected": -1.7423713207244873, "logps/chosen": -401.23883056640625, "logps/rejected": -456.70623779296875, "loss": 0.4917, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.081215262413025, "rewards/margins": 0.6640897989273071, "rewards/rejected": -1.745304822921753, "step": 3950 }, { "epoch": 4.15, "learning_rate": 3.650519248305583e-08, "logits/chosen": -1.8554394245147705, "logits/rejected": -1.7356094121932983, "logps/chosen": -440.2884216308594, "logps/rejected": -496.92877197265625, "loss": 0.4588, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -1.0013624429702759, "rewards/margins": 0.8840063810348511, "rewards/rejected": -1.8853687047958374, "step": 3960 }, { "epoch": 4.16, "learning_rate": 3.5636644695553554e-08, "logits/chosen": -1.8372949361801147, "logits/rejected": -1.7880789041519165, "logps/chosen": -425.9288635253906, "logps/rejected": -475.31256103515625, "loss": 0.4733, "rewards/accuracies": 0.78125, "rewards/chosen": -1.003899335861206, "rewards/margins": 0.763668417930603, "rewards/rejected": -1.7675678730010986, "step": 3970 }, { "epoch": 4.17, "learning_rate": 3.477776058870166e-08, "logits/chosen": -1.7965351343154907, "logits/rejected": -1.646406888961792, "logps/chosen": -440.19598388671875, "logps/rejected": -478.03033447265625, "loss": 0.4435, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -1.0087398290634155, "rewards/margins": 0.8503010869026184, "rewards/rejected": -1.8590409755706787, "step": 3980 }, { "epoch": 4.18, "learning_rate": 3.392857888171904e-08, "logits/chosen": -1.8338630199432373, "logits/rejected": -1.6785008907318115, "logps/chosen": -429.9710998535156, "logps/rejected": -448.28094482421875, "loss": 0.4643, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.9645228385925293, "rewards/margins": 0.7628434300422668, "rewards/rejected": -1.7273662090301514, "step": 3990 }, { "epoch": 4.19, "learning_rate": 3.308913785643255e-08, "logits/chosen": -1.7557369470596313, "logits/rejected": -1.6469757556915283, "logps/chosen": -422.50653076171875, "logps/rejected": -435.71063232421875, "loss": 0.461, "rewards/accuracies": 0.8125, "rewards/chosen": -0.9858303070068359, "rewards/margins": 0.7525902986526489, "rewards/rejected": -1.7384207248687744, "step": 4000 }, { "epoch": 4.19, "eval_logits/chosen": -1.832720398902893, "eval_logits/rejected": -1.7189408540725708, "eval_logps/chosen": -466.1141662597656, "eval_logps/rejected": -483.5272521972656, "eval_loss": 0.5745397210121155, "eval_rewards/accuracies": 0.7222222089767456, "eval_rewards/chosen": -1.1675440073013306, "eval_rewards/margins": 0.5887669324874878, "eval_rewards/rejected": -1.7563108205795288, "eval_runtime": 225.2947, "eval_samples_per_second": 8.877, "eval_steps_per_second": 0.28, "step": 4000 }, { "epoch": 4.2, "learning_rate": 3.225947535555079e-08, "logits/chosen": -1.7829539775848389, "logits/rejected": -1.7191102504730225, "logps/chosen": -451.741943359375, "logps/rejected": -505.08514404296875, "loss": 0.4744, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -1.09004545211792, "rewards/margins": 0.8111955523490906, "rewards/rejected": -1.9012410640716553, "step": 4010 }, { "epoch": 4.21, "learning_rate": 3.143962878095829e-08, "logits/chosen": -1.7982536554336548, "logits/rejected": -1.722922921180725, "logps/chosen": -444.19189453125, "logps/rejected": -515.3712158203125, "loss": 0.4698, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -1.0294020175933838, "rewards/margins": 0.8970456123352051, "rewards/rejected": -1.9264476299285889, "step": 4020 }, { "epoch": 4.22, "learning_rate": 3.0629635092029345e-08, "logits/chosen": -1.7503137588500977, "logits/rejected": -1.6368910074234009, "logps/chosen": -433.9314880371094, "logps/rejected": -452.56524658203125, "loss": 0.4584, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -1.0288403034210205, "rewards/margins": 0.811961829662323, "rewards/rejected": -1.8408019542694092, "step": 4030 }, { "epoch": 4.23, "learning_rate": 2.9829530803961665e-08, "logits/chosen": -1.7854505777359009, "logits/rejected": -1.679652452468872, "logps/chosen": -434.6220703125, "logps/rejected": -481.2295837402344, "loss": 0.4636, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.0290977954864502, "rewards/margins": 0.8236631155014038, "rewards/rejected": -1.852760910987854, "step": 4040 }, { "epoch": 4.24, "learning_rate": 2.903935198613089e-08, "logits/chosen": -1.8103902339935303, "logits/rejected": -1.780846357345581, "logps/chosen": -447.97113037109375, "logps/rejected": -498.028076171875, "loss": 0.4595, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.0087683200836182, "rewards/margins": 0.7507632374763489, "rewards/rejected": -1.7595316171646118, "step": 4050 }, { "epoch": 4.25, "learning_rate": 2.8259134260463586e-08, "logits/chosen": -1.8016932010650635, "logits/rejected": -1.702823281288147, "logps/chosen": -474.5953674316406, "logps/rejected": -477.8955993652344, "loss": 0.4824, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -1.0956062078475952, "rewards/margins": 0.7087380886077881, "rewards/rejected": -1.8043444156646729, "step": 4060 }, { "epoch": 4.26, "learning_rate": 2.748891279983226e-08, "logits/chosen": -1.7880712747573853, "logits/rejected": -1.675244927406311, "logps/chosen": -443.755859375, "logps/rejected": -508.98553466796875, "loss": 0.4635, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -1.1239733695983887, "rewards/margins": 0.8481414914131165, "rewards/rejected": -1.97211492061615, "step": 4070 }, { "epoch": 4.27, "learning_rate": 2.6728722326469167e-08, "logits/chosen": -1.7646725177764893, "logits/rejected": -1.669007658958435, "logps/chosen": -416.451171875, "logps/rejected": -482.55859375, "loss": 0.4678, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -1.0863951444625854, "rewards/margins": 0.7753079533576965, "rewards/rejected": -1.8617031574249268, "step": 4080 }, { "epoch": 4.28, "learning_rate": 2.5978597110401402e-08, "logits/chosen": -1.8389514684677124, "logits/rejected": -1.725760817527771, "logps/chosen": -470.4855041503906, "logps/rejected": -503.29248046875, "loss": 0.4757, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -1.050333023071289, "rewards/margins": 0.7826107740402222, "rewards/rejected": -1.8329439163208008, "step": 4090 }, { "epoch": 4.29, "learning_rate": 2.5238570967905492e-08, "logits/chosen": -1.816535234451294, "logits/rejected": -1.7674373388290405, "logps/chosen": -427.8245544433594, "logps/rejected": -469.5841369628906, "loss": 0.4881, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.9843165278434753, "rewards/margins": 0.717639148235321, "rewards/rejected": -1.701956033706665, "step": 4100 }, { "epoch": 4.29, "eval_logits/chosen": -1.8260232210159302, "eval_logits/rejected": -1.712431788444519, "eval_logps/chosen": -464.182861328125, "eval_logps/rejected": -481.8480529785156, "eval_loss": 0.5762295126914978, "eval_rewards/accuracies": 0.72817462682724, "eval_rewards/chosen": -1.1482311487197876, "eval_rewards/margins": 0.5912875533103943, "eval_rewards/rejected": -1.7395187616348267, "eval_runtime": 229.9132, "eval_samples_per_second": 8.699, "eval_steps_per_second": 0.274, "step": 4100 }, { "epoch": 4.3, "learning_rate": 2.4508677259983486e-08, "logits/chosen": -1.7945934534072876, "logits/rejected": -1.721040964126587, "logps/chosen": -452.2828063964844, "logps/rejected": -499.61480712890625, "loss": 0.4607, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -1.0209802389144897, "rewards/margins": 0.7915663123130798, "rewards/rejected": -1.8125463724136353, "step": 4110 }, { "epoch": 4.31, "learning_rate": 2.3788948890858613e-08, "logits/chosen": -1.8254003524780273, "logits/rejected": -1.7241672277450562, "logps/chosen": -441.425048828125, "logps/rejected": -490.83245849609375, "loss": 0.4782, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.1029802560806274, "rewards/margins": 0.6961835622787476, "rewards/rejected": -1.799163579940796, "step": 4120 }, { "epoch": 4.32, "learning_rate": 2.3079418306492098e-08, "logits/chosen": -1.8279308080673218, "logits/rejected": -1.726564645767212, "logps/chosen": -449.76678466796875, "logps/rejected": -478.5965881347656, "loss": 0.4654, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -1.0333783626556396, "rewards/margins": 0.800345778465271, "rewards/rejected": -1.8337242603302002, "step": 4130 }, { "epoch": 4.33, "learning_rate": 2.2380117493120493e-08, "logits/chosen": -1.7413800954818726, "logits/rejected": -1.6594340801239014, "logps/chosen": -419.1304626464844, "logps/rejected": -470.057373046875, "loss": 0.4836, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.9636515378952026, "rewards/margins": 0.8068215250968933, "rewards/rejected": -1.7704731225967407, "step": 4140 }, { "epoch": 4.34, "learning_rate": 2.1691077975813488e-08, "logits/chosen": -1.8358606100082397, "logits/rejected": -1.718096137046814, "logps/chosen": -452.9070739746094, "logps/rejected": -510.75439453125, "loss": 0.4629, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.0342588424682617, "rewards/margins": 0.8376301527023315, "rewards/rejected": -1.871889352798462, "step": 4150 }, { "epoch": 4.36, "learning_rate": 2.1012330817053142e-08, "logits/chosen": -1.675252914428711, "logits/rejected": -1.6835241317749023, "logps/chosen": -434.5631408691406, "logps/rejected": -507.29278564453125, "loss": 0.4657, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -1.1027967929840088, "rewards/margins": 0.7846490740776062, "rewards/rejected": -1.8874458074569702, "step": 4160 }, { "epoch": 4.37, "learning_rate": 2.0343906615333113e-08, "logits/chosen": -1.7944949865341187, "logits/rejected": -1.6714982986450195, "logps/chosen": -463.69842529296875, "logps/rejected": -492.56488037109375, "loss": 0.4669, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -1.0258897542953491, "rewards/margins": 0.7885478734970093, "rewards/rejected": -1.8144375085830688, "step": 4170 }, { "epoch": 4.38, "learning_rate": 1.968583550377953e-08, "logits/chosen": -1.7210184335708618, "logits/rejected": -1.5984843969345093, "logps/chosen": -449.9814453125, "logps/rejected": -471.9698181152344, "loss": 0.4691, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.9973635673522949, "rewards/margins": 0.8492447733879089, "rewards/rejected": -1.8466084003448486, "step": 4180 }, { "epoch": 4.39, "learning_rate": 1.903814714879251e-08, "logits/chosen": -1.8879683017730713, "logits/rejected": -1.7043514251708984, "logps/chosen": -494.8345642089844, "logps/rejected": -487.78680419921875, "loss": 0.466, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -1.0199146270751953, "rewards/margins": 0.7881089448928833, "rewards/rejected": -1.808023452758789, "step": 4190 }, { "epoch": 4.4, "learning_rate": 1.840087074870883e-08, "logits/chosen": -1.8216197490692139, "logits/rejected": -1.724962592124939, "logps/chosen": -480.49090576171875, "logps/rejected": -513.2200927734375, "loss": 0.4449, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -1.0223886966705322, "rewards/margins": 0.8934313654899597, "rewards/rejected": -1.9158203601837158, "step": 4200 }, { "epoch": 4.4, "eval_logits/chosen": -1.8251301050186157, "eval_logits/rejected": -1.711572289466858, "eval_logps/chosen": -466.1421203613281, "eval_logps/rejected": -484.05059814453125, "eval_loss": 0.5764839053153992, "eval_rewards/accuracies": 0.7202380895614624, "eval_rewards/chosen": -1.1678234338760376, "eval_rewards/margins": 0.5937210321426392, "eval_rewards/rejected": -1.7615445852279663, "eval_runtime": 219.2125, "eval_samples_per_second": 9.124, "eval_steps_per_second": 0.287, "step": 4200 }, { "epoch": 4.41, "learning_rate": 1.7774035032485367e-08, "logits/chosen": -1.7790334224700928, "logits/rejected": -1.6728601455688477, "logps/chosen": -456.02197265625, "logps/rejected": -504.24505615234375, "loss": 0.4849, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.129804015159607, "rewards/margins": 0.7883261442184448, "rewards/rejected": -1.9181305170059204, "step": 4210 }, { "epoch": 4.42, "learning_rate": 1.7157668258404312e-08, "logits/chosen": -1.6565701961517334, "logits/rejected": -1.6299617290496826, "logps/chosen": -389.723876953125, "logps/rejected": -458.76214599609375, "loss": 0.4669, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.0441362857818604, "rewards/margins": 0.6970081329345703, "rewards/rejected": -1.7411444187164307, "step": 4220 }, { "epoch": 4.43, "learning_rate": 1.6551798212799227e-08, "logits/chosen": -1.7372820377349854, "logits/rejected": -1.674203634262085, "logps/chosen": -443.078125, "logps/rejected": -481.3499450683594, "loss": 0.4812, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -1.0964055061340332, "rewards/margins": 0.6954679489135742, "rewards/rejected": -1.791873574256897, "step": 4230 }, { "epoch": 4.44, "learning_rate": 1.595645220880204e-08, "logits/chosen": -1.7401357889175415, "logits/rejected": -1.607173204421997, "logps/chosen": -464.32763671875, "logps/rejected": -509.621337890625, "loss": 0.483, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -1.0939157009124756, "rewards/margins": 0.7595055103302002, "rewards/rejected": -1.8534212112426758, "step": 4240 }, { "epoch": 4.45, "learning_rate": 1.537165708511226e-08, "logits/chosen": -1.8759247064590454, "logits/rejected": -1.725313425064087, "logps/chosen": -477.0743713378906, "logps/rejected": -488.4725646972656, "loss": 0.4588, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.1122633218765259, "rewards/margins": 0.7690132856369019, "rewards/rejected": -1.8812764883041382, "step": 4250 }, { "epoch": 4.46, "learning_rate": 1.479743920478671e-08, "logits/chosen": -1.8185522556304932, "logits/rejected": -1.745117425918579, "logps/chosen": -472.5321350097656, "logps/rejected": -490.56317138671875, "loss": 0.4814, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.1119530200958252, "rewards/margins": 0.6407719254493713, "rewards/rejected": -1.7527250051498413, "step": 4260 }, { "epoch": 4.47, "learning_rate": 1.4233824454051191e-08, "logits/chosen": -1.7532942295074463, "logits/rejected": -1.644690752029419, "logps/chosen": -444.12176513671875, "logps/rejected": -484.6136779785156, "loss": 0.469, "rewards/accuracies": 0.8125, "rewards/chosen": -1.0666099786758423, "rewards/margins": 0.803938090801239, "rewards/rejected": -1.8705480098724365, "step": 4270 }, { "epoch": 4.48, "learning_rate": 1.3680838241133475e-08, "logits/chosen": -1.8085733652114868, "logits/rejected": -1.7114070653915405, "logps/chosen": -454.65179443359375, "logps/rejected": -483.69598388671875, "loss": 0.463, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.9596077799797058, "rewards/margins": 0.8614352941513062, "rewards/rejected": -1.8210432529449463, "step": 4280 }, { "epoch": 4.49, "learning_rate": 1.3138505495117913e-08, "logits/chosen": -1.7711594104766846, "logits/rejected": -1.7229337692260742, "logps/chosen": -445.0728454589844, "logps/rejected": -505.59564208984375, "loss": 0.4603, "rewards/accuracies": 0.8125, "rewards/chosen": -1.026932954788208, "rewards/margins": 0.8151108026504517, "rewards/rejected": -1.8420432806015015, "step": 4290 }, { "epoch": 4.5, "learning_rate": 1.2606850664821617e-08, "logits/chosen": -1.8515970706939697, "logits/rejected": -1.6959202289581299, "logps/chosen": -457.86737060546875, "logps/rejected": -480.14093017578125, "loss": 0.4692, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -1.0556819438934326, "rewards/margins": 0.7872077226638794, "rewards/rejected": -1.8428895473480225, "step": 4300 }, { "epoch": 4.5, "eval_logits/chosen": -1.8279350996017456, "eval_logits/rejected": -1.7143094539642334, "eval_logps/chosen": -466.46240234375, "eval_logps/rejected": -484.0967712402344, "eval_loss": 0.5759356021881104, "eval_rewards/accuracies": 0.7242063283920288, "eval_rewards/chosen": -1.171026349067688, "eval_rewards/margins": 0.5909795761108398, "eval_rewards/rejected": -1.7620059251785278, "eval_runtime": 226.1955, "eval_samples_per_second": 8.842, "eval_steps_per_second": 0.279, "step": 4300 }, { "epoch": 4.51, "learning_rate": 1.208589771769225e-08, "logits/chosen": -1.7081100940704346, "logits/rejected": -1.6469475030899048, "logps/chosen": -421.621337890625, "logps/rejected": -453.98956298828125, "loss": 0.4712, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.0625663995742798, "rewards/margins": 0.669289767742157, "rewards/rejected": -1.731856107711792, "step": 4310 }, { "epoch": 4.52, "learning_rate": 1.1575670138727456e-08, "logits/chosen": -1.8913682699203491, "logits/rejected": -1.6768405437469482, "logps/chosen": -473.85992431640625, "logps/rejected": -507.3983459472656, "loss": 0.4676, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -1.0402823686599731, "rewards/margins": 0.8671220541000366, "rewards/rejected": -1.9074045419692993, "step": 4320 }, { "epoch": 4.53, "learning_rate": 1.1076190929416418e-08, "logits/chosen": -1.871779441833496, "logits/rejected": -1.73202383518219, "logps/chosen": -485.59912109375, "logps/rejected": -490.35723876953125, "loss": 0.4702, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.149161458015442, "rewards/margins": 0.6439096927642822, "rewards/rejected": -1.7930711507797241, "step": 4330 }, { "epoch": 4.54, "learning_rate": 1.0587482606702697e-08, "logits/chosen": -1.8452228307724, "logits/rejected": -1.6967185735702515, "logps/chosen": -459.0291442871094, "logps/rejected": -490.520263671875, "loss": 0.4512, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.0778141021728516, "rewards/margins": 0.7190856337547302, "rewards/rejected": -1.7969001531600952, "step": 4340 }, { "epoch": 4.55, "learning_rate": 1.0109567201969176e-08, "logits/chosen": -1.823883056640625, "logits/rejected": -1.7028872966766357, "logps/chosen": -429.84124755859375, "logps/rejected": -477.64215087890625, "loss": 0.4699, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.9281118512153625, "rewards/margins": 0.8856562376022339, "rewards/rejected": -1.8137681484222412, "step": 4350 }, { "epoch": 4.56, "learning_rate": 9.642466260044918e-09, "logits/chosen": -1.762036919593811, "logits/rejected": -1.6680705547332764, "logps/chosen": -427.251220703125, "logps/rejected": -469.9976501464844, "loss": 0.479, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -1.0374650955200195, "rewards/margins": 0.7968862056732178, "rewards/rejected": -1.8343513011932373, "step": 4360 }, { "epoch": 4.58, "learning_rate": 9.186200838233904e-09, "logits/chosen": -1.7466312646865845, "logits/rejected": -1.6498746871948242, "logps/chosen": -464.2078552246094, "logps/rejected": -464.82037353515625, "loss": 0.4989, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.1440064907073975, "rewards/margins": 0.6234654784202576, "rewards/rejected": -1.7674716711044312, "step": 4370 }, { "epoch": 4.59, "learning_rate": 8.740791505365747e-09, "logits/chosen": -1.8438808917999268, "logits/rejected": -1.7786586284637451, "logps/chosen": -413.7540588378906, "logps/rejected": -479.59954833984375, "loss": 0.4642, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.982395350933075, "rewards/margins": 0.7656720876693726, "rewards/rejected": -1.7480674982070923, "step": 4380 }, { "epoch": 4.6, "learning_rate": 8.3062583408684e-09, "logits/chosen": -1.7615505456924438, "logits/rejected": -1.6486164331436157, "logps/chosen": -465.7588806152344, "logps/rejected": -513.684814453125, "loss": 0.4674, "rewards/accuracies": 0.8125, "rewards/chosen": -1.0538051128387451, "rewards/margins": 0.9259963035583496, "rewards/rejected": -1.9798015356063843, "step": 4390 }, { "epoch": 4.61, "learning_rate": 7.88262093386302e-09, "logits/chosen": -1.7515672445297241, "logits/rejected": -1.6775462627410889, "logps/chosen": -452.8060607910156, "logps/rejected": -492.5433044433594, "loss": 0.4654, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -1.0378320217132568, "rewards/margins": 0.7652202844619751, "rewards/rejected": -1.803052306175232, "step": 4400 }, { "epoch": 4.61, "eval_logits/chosen": -1.8290122747421265, "eval_logits/rejected": -1.7153981924057007, "eval_logps/chosen": -466.3009338378906, "eval_logps/rejected": -484.222412109375, "eval_loss": 0.5759946703910828, "eval_rewards/accuracies": 0.726190447807312, "eval_rewards/chosen": -1.1694118976593018, "eval_rewards/margins": 0.5938506722450256, "eval_rewards/rejected": -1.763262391090393, "eval_runtime": 229.4785, "eval_samples_per_second": 8.715, "eval_steps_per_second": 0.275, "step": 4400 }, { "epoch": 4.62, "learning_rate": 7.469898382280765e-09, "logits/chosen": -1.8184077739715576, "logits/rejected": -1.722876787185669, "logps/chosen": -464.96612548828125, "logps/rejected": -512.9050903320312, "loss": 0.5075, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -1.1251068115234375, "rewards/margins": 0.7477315068244934, "rewards/rejected": -1.872838020324707, "step": 4410 }, { "epoch": 4.63, "learning_rate": 7.068109292002022e-09, "logits/chosen": -1.8087494373321533, "logits/rejected": -1.651476502418518, "logps/chosen": -461.4347229003906, "logps/rejected": -481.1415100097656, "loss": 0.4707, "rewards/accuracies": 0.8125, "rewards/chosen": -0.9792950749397278, "rewards/margins": 0.8313227891921997, "rewards/rejected": -1.8106176853179932, "step": 4420 }, { "epoch": 4.64, "learning_rate": 6.677271776017457e-09, "logits/chosen": -1.7190685272216797, "logits/rejected": -1.5987236499786377, "logps/chosen": -453.14617919921875, "logps/rejected": -511.3948669433594, "loss": 0.4754, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.0528547763824463, "rewards/margins": 0.8089377284049988, "rewards/rejected": -1.8617923259735107, "step": 4430 }, { "epoch": 4.65, "learning_rate": 6.297403453611488e-09, "logits/chosen": -1.753126859664917, "logits/rejected": -1.6576951742172241, "logps/chosen": -443.3055725097656, "logps/rejected": -492.17633056640625, "loss": 0.4545, "rewards/accuracies": 0.78125, "rewards/chosen": -1.0997533798217773, "rewards/margins": 0.753233790397644, "rewards/rejected": -1.852987289428711, "step": 4440 }, { "epoch": 4.66, "learning_rate": 5.928521449568236e-09, "logits/chosen": -1.8097671270370483, "logits/rejected": -1.6349399089813232, "logps/chosen": -489.1026916503906, "logps/rejected": -497.45684814453125, "loss": 0.4709, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -1.0746281147003174, "rewards/margins": 0.84410160779953, "rewards/rejected": -1.9187300205230713, "step": 4450 }, { "epoch": 4.67, "learning_rate": 5.570642393399105e-09, "logits/chosen": -1.8092399835586548, "logits/rejected": -1.6813846826553345, "logps/chosen": -449.77532958984375, "logps/rejected": -478.45013427734375, "loss": 0.4655, "rewards/accuracies": 0.8125, "rewards/chosen": -1.064793586730957, "rewards/margins": 0.7874218225479126, "rewards/rejected": -1.8522160053253174, "step": 4460 }, { "epoch": 4.68, "learning_rate": 5.223782418593503e-09, "logits/chosen": -1.7820489406585693, "logits/rejected": -1.7109342813491821, "logps/chosen": -465.9383850097656, "logps/rejected": -522.559814453125, "loss": 0.4838, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.1747386455535889, "rewards/margins": 0.7057716250419617, "rewards/rejected": -1.8805103302001953, "step": 4470 }, { "epoch": 4.69, "learning_rate": 4.887957161891304e-09, "logits/chosen": -1.7804561853408813, "logits/rejected": -1.6633250713348389, "logps/chosen": -428.78375244140625, "logps/rejected": -464.534423828125, "loss": 0.4671, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -1.0096886157989502, "rewards/margins": 0.8078888654708862, "rewards/rejected": -1.817577600479126, "step": 4480 }, { "epoch": 4.7, "learning_rate": 4.5631817625780274e-09, "logits/chosen": -1.8089252710342407, "logits/rejected": -1.712436318397522, "logps/chosen": -463.1101989746094, "logps/rejected": -498.7040100097656, "loss": 0.4795, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -1.0910892486572266, "rewards/margins": 0.6699890494346619, "rewards/rejected": -1.7610784769058228, "step": 4490 }, { "epoch": 4.71, "learning_rate": 4.249470861802218e-09, "logits/chosen": -1.8122116327285767, "logits/rejected": -1.7331546545028687, "logps/chosen": -415.1197814941406, "logps/rejected": -455.2989196777344, "loss": 0.4608, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.057392954826355, "rewards/margins": 0.7255457639694214, "rewards/rejected": -1.7829385995864868, "step": 4500 }, { "epoch": 4.71, "eval_logits/chosen": -1.8304409980773926, "eval_logits/rejected": -1.7171387672424316, "eval_logps/chosen": -467.0130920410156, "eval_logps/rejected": -484.81231689453125, "eval_loss": 0.5753689408302307, "eval_rewards/accuracies": 0.7202380895614624, "eval_rewards/chosen": -1.176533579826355, "eval_rewards/margins": 0.592628002166748, "eval_rewards/rejected": -1.769161581993103, "eval_runtime": 230.1385, "eval_samples_per_second": 8.69, "eval_steps_per_second": 0.274, "step": 4500 }, { "epoch": 4.72, "learning_rate": 3.946838601915581e-09, "logits/chosen": -1.7701961994171143, "logits/rejected": -1.678413987159729, "logps/chosen": -433.9873962402344, "logps/rejected": -465.7084045410156, "loss": 0.4713, "rewards/accuracies": 0.78125, "rewards/chosen": -1.0330111980438232, "rewards/margins": 0.6660115718841553, "rewards/rejected": -1.699022889137268, "step": 4510 }, { "epoch": 4.73, "learning_rate": 3.6552986258354123e-09, "logits/chosen": -1.7901086807250977, "logits/rejected": -1.6710205078125, "logps/chosen": -508.4490661621094, "logps/rejected": -499.4988708496094, "loss": 0.4517, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.1582683324813843, "rewards/margins": 0.6341474652290344, "rewards/rejected": -1.792415976524353, "step": 4520 }, { "epoch": 4.74, "learning_rate": 3.3748640764293955e-09, "logits/chosen": -1.7829793691635132, "logits/rejected": -1.653700590133667, "logps/chosen": -477.4127502441406, "logps/rejected": -496.84991455078125, "loss": 0.4831, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -1.1034595966339111, "rewards/margins": 0.7908321619033813, "rewards/rejected": -1.894291639328003, "step": 4530 }, { "epoch": 4.75, "learning_rate": 3.1055475959232693e-09, "logits/chosen": -1.761479139328003, "logits/rejected": -1.678342580795288, "logps/chosen": -474.9033203125, "logps/rejected": -467.4913635253906, "loss": 0.4562, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -1.0001628398895264, "rewards/margins": 0.7919613718986511, "rewards/rejected": -1.7921243906021118, "step": 4540 }, { "epoch": 4.76, "learning_rate": 2.8473613253308937e-09, "logits/chosen": -1.7882721424102783, "logits/rejected": -1.7147448062896729, "logps/chosen": -466.8858947753906, "logps/rejected": -501.8744201660156, "loss": 0.4796, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.9468268156051636, "rewards/margins": 0.6786229014396667, "rewards/rejected": -1.625449776649475, "step": 4550 }, { "epoch": 4.77, "learning_rate": 2.6003169039068574e-09, "logits/chosen": -1.8359006643295288, "logits/rejected": -1.7158222198486328, "logps/chosen": -440.7718200683594, "logps/rejected": -496.41583251953125, "loss": 0.4748, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.087088942527771, "rewards/margins": 0.7704466581344604, "rewards/rejected": -1.8575356006622314, "step": 4560 }, { "epoch": 4.78, "learning_rate": 2.3644254686217837e-09, "logits/chosen": -1.7801955938339233, "logits/rejected": -1.6484178304672241, "logps/chosen": -475.614013671875, "logps/rejected": -476.01654052734375, "loss": 0.4831, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.1018412113189697, "rewards/margins": 0.7570894360542297, "rewards/rejected": -1.8589305877685547, "step": 4570 }, { "epoch": 4.79, "learning_rate": 2.139697653660316e-09, "logits/chosen": -1.8015964031219482, "logits/rejected": -1.6739526987075806, "logps/chosen": -466.79052734375, "logps/rejected": -482.6788024902344, "loss": 0.4746, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.1629666090011597, "rewards/margins": 0.607349157333374, "rewards/rejected": -1.7703158855438232, "step": 4580 }, { "epoch": 4.81, "learning_rate": 1.92614358994167e-09, "logits/chosen": -1.8202970027923584, "logits/rejected": -1.7315584421157837, "logps/chosen": -479.6277770996094, "logps/rejected": -499.9185485839844, "loss": 0.4615, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.1074717044830322, "rewards/margins": 0.7402085065841675, "rewards/rejected": -1.8476800918579102, "step": 4590 }, { "epoch": 4.82, "learning_rate": 1.7237729046629679e-09, "logits/chosen": -1.728281021118164, "logits/rejected": -1.6742477416992188, "logps/chosen": -438.21844482421875, "logps/rejected": -507.11712646484375, "loss": 0.4661, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.1053134202957153, "rewards/margins": 0.7916030883789062, "rewards/rejected": -1.8969166278839111, "step": 4600 }, { "epoch": 4.82, "eval_logits/chosen": -1.825499176979065, "eval_logits/rejected": -1.711985468864441, "eval_logps/chosen": -467.5480651855469, "eval_logps/rejected": -485.3936767578125, "eval_loss": 0.5754343271255493, "eval_rewards/accuracies": 0.72817462682724, "eval_rewards/chosen": -1.181883454322815, "eval_rewards/margins": 0.5930914282798767, "eval_rewards/rejected": -1.7749747037887573, "eval_runtime": 232.3791, "eval_samples_per_second": 8.607, "eval_steps_per_second": 0.271, "step": 4600 }, { "epoch": 4.83, "learning_rate": 1.5325947208651713e-09, "logits/chosen": -1.7464689016342163, "logits/rejected": -1.6714897155761719, "logps/chosen": -427.6334533691406, "logps/rejected": -462.86590576171875, "loss": 0.445, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.0283820629119873, "rewards/margins": 0.809057891368866, "rewards/rejected": -1.8374401330947876, "step": 4610 }, { "epoch": 4.84, "learning_rate": 1.352617657021854e-09, "logits/chosen": -1.7710460424423218, "logits/rejected": -1.703453779220581, "logps/chosen": -393.85345458984375, "logps/rejected": -438.5416564941406, "loss": 0.4722, "rewards/accuracies": 0.8125, "rewards/chosen": -1.0014649629592896, "rewards/margins": 0.7734811902046204, "rewards/rejected": -1.7749459743499756, "step": 4620 }, { "epoch": 4.85, "learning_rate": 1.1838498266507069e-09, "logits/chosen": -1.8539412021636963, "logits/rejected": -1.7409346103668213, "logps/chosen": -438.21563720703125, "logps/rejected": -478.8936462402344, "loss": 0.4596, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.0243479013442993, "rewards/margins": 0.816145122051239, "rewards/rejected": -1.8404929637908936, "step": 4630 }, { "epoch": 4.86, "learning_rate": 1.0262988379476922e-09, "logits/chosen": -1.7646992206573486, "logits/rejected": -1.6506078243255615, "logps/chosen": -478.5440979003906, "logps/rejected": -482.2001953125, "loss": 0.4836, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.151123285293579, "rewards/margins": 0.6975986957550049, "rewards/rejected": -1.8487218618392944, "step": 4640 }, { "epoch": 4.87, "learning_rate": 8.79971793444123e-10, "logits/chosen": -1.7563340663909912, "logits/rejected": -1.6405454874038696, "logps/chosen": -439.6841735839844, "logps/rejected": -495.43829345703125, "loss": 0.4669, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -1.0974491834640503, "rewards/margins": 0.8005310893058777, "rewards/rejected": -1.8979803323745728, "step": 4650 }, { "epoch": 4.88, "learning_rate": 7.448752896864197e-10, "logits/chosen": -1.8486369848251343, "logits/rejected": -1.6208820343017578, "logps/chosen": -469.00390625, "logps/rejected": -463.940673828125, "loss": 0.502, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -1.085375189781189, "rewards/margins": 0.7727323770523071, "rewards/rejected": -1.858107328414917, "step": 4660 }, { "epoch": 4.89, "learning_rate": 6.210154169388193e-10, "logits/chosen": -1.7519057989120483, "logits/rejected": -1.6661628484725952, "logps/chosen": -452.79205322265625, "logps/rejected": -476.6766662597656, "loss": 0.4611, "rewards/accuracies": 0.78125, "rewards/chosen": -1.1015011072158813, "rewards/margins": 0.7438825964927673, "rewards/rejected": -1.845383644104004, "step": 4670 }, { "epoch": 4.9, "learning_rate": 5.083977589086796e-10, "logits/chosen": -1.882615327835083, "logits/rejected": -1.7311588525772095, "logps/chosen": -480.04827880859375, "logps/rejected": -506.5596618652344, "loss": 0.4507, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.9937393069267273, "rewards/margins": 0.8901308178901672, "rewards/rejected": -1.8838701248168945, "step": 4680 }, { "epoch": 4.91, "learning_rate": 4.070273924949574e-10, "logits/chosen": -1.789244294166565, "logits/rejected": -1.7519454956054688, "logps/chosen": -469.8148498535156, "logps/rejected": -508.5157165527344, "loss": 0.4728, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -1.0590931177139282, "rewards/margins": 0.7643530964851379, "rewards/rejected": -1.823446273803711, "step": 4690 }, { "epoch": 4.92, "learning_rate": 3.169088875591419e-10, "logits/chosen": -1.7920825481414795, "logits/rejected": -1.705712080001831, "logps/chosen": -448.9642639160156, "logps/rejected": -477.53350830078125, "loss": 0.4859, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -1.0943710803985596, "rewards/margins": 0.7004526257514954, "rewards/rejected": -1.7948236465454102, "step": 4700 }, { "epoch": 4.92, "eval_logits/chosen": -1.8237359523773193, "eval_logits/rejected": -1.710079550743103, "eval_logps/chosen": -467.6951904296875, "eval_logps/rejected": -485.5031433105469, "eval_loss": 0.5756003856658936, "eval_rewards/accuracies": 0.7202380895614624, "eval_rewards/chosen": -1.1833546161651611, "eval_rewards/margins": 0.5927155017852783, "eval_rewards/rejected": -1.7760698795318604, "eval_runtime": 225.1994, "eval_samples_per_second": 8.881, "eval_steps_per_second": 0.28, "step": 4700 }, { "epoch": 4.93, "learning_rate": 2.380463067193361e-10, "logits/chosen": -1.744341492652893, "logits/rejected": -1.6670739650726318, "logps/chosen": -422.7705993652344, "logps/rejected": -458.7456970214844, "loss": 0.4661, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.988696277141571, "rewards/margins": 0.7636533975601196, "rewards/rejected": -1.7523494958877563, "step": 4710 }, { "epoch": 4.94, "learning_rate": 1.7044320516718113e-10, "logits/chosen": -1.7861382961273193, "logits/rejected": -1.728690505027771, "logps/chosen": -434.71026611328125, "logps/rejected": -507.8138732910156, "loss": 0.4709, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.0964405536651611, "rewards/margins": 0.8338233232498169, "rewards/rejected": -1.930263876914978, "step": 4720 }, { "epoch": 4.95, "learning_rate": 1.1410263050737335e-10, "logits/chosen": -1.765300989151001, "logits/rejected": -1.6384683847427368, "logps/chosen": -451.39990234375, "logps/rejected": -479.0953674316406, "loss": 0.4637, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -1.0694044828414917, "rewards/margins": 0.893588662147522, "rewards/rejected": -1.9629930257797241, "step": 4730 }, { "epoch": 4.96, "learning_rate": 6.902712262055188e-11, "logits/chosen": -1.751755714416504, "logits/rejected": -1.6271288394927979, "logps/chosen": -445.4202575683594, "logps/rejected": -468.89678955078125, "loss": 0.4951, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.0948576927185059, "rewards/margins": 0.7223270535469055, "rewards/rejected": -1.8171848058700562, "step": 4740 }, { "epoch": 4.97, "learning_rate": 3.52187135485571e-11, "logits/chosen": -1.7895715236663818, "logits/rejected": -1.596680998802185, "logps/chosen": -461.5955505371094, "logps/rejected": -480.7381896972656, "loss": 0.4858, "rewards/accuracies": 0.75, "rewards/chosen": -1.0684163570404053, "rewards/margins": 0.6338636875152588, "rewards/rejected": -1.702280044555664, "step": 4750 }, { "epoch": 4.98, "learning_rate": 1.2678927402948181e-11, "logits/chosen": -1.756028175354004, "logits/rejected": -1.7099990844726562, "logps/chosen": -447.0310974121094, "logps/rejected": -500.3916931152344, "loss": 0.4766, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.0929630994796753, "rewards/margins": 0.802161693572998, "rewards/rejected": -1.8951247930526733, "step": 4760 }, { "epoch": 4.99, "learning_rate": 1.408780296280332e-12, "logits/chosen": -1.8381448984146118, "logits/rejected": -1.7302604913711548, "logps/chosen": -483.72064208984375, "logps/rejected": -538.6234130859375, "loss": 0.4449, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.0196287631988525, "rewards/margins": 0.8810880780220032, "rewards/rejected": -1.9007165431976318, "step": 4770 }, { "epoch": 5.0, "step": 4775, "total_flos": 0.0, "train_loss": 0.15748632995245967, "train_runtime": 23969.9997, "train_samples_per_second": 12.752, "train_steps_per_second": 0.199 } ], "logging_steps": 10, "max_steps": 4775, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }