{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9998854993048172, "eval_steps": 10, "global_step": 7641, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 6.535947712418301e-09, "logits/chosen": -3.0474565029144287, "logits/rejected": -3.0019595623016357, "logps/chosen": -250.30178833007812, "logps/rejected": -231.682373046875, "loss": 0.6933, "rewards/accuracies": 0.0, "rewards/chosen": -0.00028943538200110197, "rewards/margins": -0.0002489328326191753, "rewards/rejected": -4.050254574394785e-05, "step": 1 }, { "epoch": 0.0, "learning_rate": 6.535947712418302e-08, "logits/chosen": -2.9978737831115723, "logits/rejected": -3.0040385723114014, "logps/chosen": -347.8559875488281, "logps/rejected": -305.50567626953125, "loss": 0.693, "rewards/accuracies": 0.4305555522441864, "rewards/chosen": -0.0002880638639908284, "rewards/margins": 0.0003410349600017071, "rewards/rejected": -0.0006290989113040268, "step": 10 }, { "epoch": 0.0, "eval_logits/chosen": -2.7424161434173584, "eval_logits/rejected": -2.7351112365722656, "eval_logps/chosen": -332.7445373535156, "eval_logps/rejected": -301.1111755371094, "eval_loss": 0.6931213140487671, "eval_rewards/accuracies": 0.4964999854564667, "eval_rewards/chosen": 4.8589161451673135e-05, "eval_rewards/margins": 6.048592695151456e-05, "eval_rewards/rejected": -1.189680006064009e-05, "eval_runtime": 196.7026, "eval_samples_per_second": 10.168, "eval_steps_per_second": 5.084, "step": 10 }, { "epoch": 0.0, "learning_rate": 1.3071895424836603e-07, "logits/chosen": -3.037752628326416, "logits/rejected": -3.0157015323638916, "logps/chosen": -326.78704833984375, "logps/rejected": -328.48126220703125, "loss": 0.6932, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": 0.000477545807370916, "rewards/margins": -0.00011621458543231711, "rewards/rejected": 0.0005937603418715298, "step": 20 }, { "epoch": 0.0, "eval_logits/chosen": -2.7427480220794678, "eval_logits/rejected": -2.7354896068573, "eval_logps/chosen": -332.742431640625, "eval_logps/rejected": -301.1186218261719, "eval_loss": 0.6930737495422363, "eval_rewards/accuracies": 0.49799999594688416, "eval_rewards/chosen": 6.974298594286665e-05, "eval_rewards/margins": 0.00015557045117020607, "eval_rewards/rejected": -8.582745067542419e-05, "eval_runtime": 196.8696, "eval_samples_per_second": 10.159, "eval_steps_per_second": 5.08, "step": 20 }, { "epoch": 0.0, "learning_rate": 1.9607843137254904e-07, "logits/chosen": -2.990180492401123, "logits/rejected": -2.9676098823547363, "logps/chosen": -294.37188720703125, "logps/rejected": -254.3704833984375, "loss": 0.6931, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.00014316548185888678, "rewards/margins": 3.067384386667982e-05, "rewards/rejected": 0.00011249161616433412, "step": 30 }, { "epoch": 0.0, "eval_logits/chosen": -2.7423112392425537, "eval_logits/rejected": -2.735067367553711, "eval_logps/chosen": -332.74560546875, "eval_logps/rejected": -301.10736083984375, "eval_loss": 0.6931455731391907, "eval_rewards/accuracies": 0.49300000071525574, "eval_rewards/chosen": 3.810242560575716e-05, "eval_rewards/margins": 1.1804982023022603e-05, "eval_rewards/rejected": 2.629743903526105e-05, "eval_runtime": 196.8402, "eval_samples_per_second": 10.161, "eval_steps_per_second": 5.08, "step": 30 }, { "epoch": 0.01, "learning_rate": 2.6143790849673207e-07, "logits/chosen": -3.080005168914795, "logits/rejected": -2.9933598041534424, "logps/chosen": -330.540771484375, "logps/rejected": -295.5124816894531, "loss": 0.6931, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": 0.00030000676633790135, "rewards/margins": 0.00012870438513346016, "rewards/rejected": 0.00017130242486018687, "step": 40 }, { "epoch": 0.01, "eval_logits/chosen": -2.742424964904785, "eval_logits/rejected": -2.7351646423339844, "eval_logps/chosen": -332.74249267578125, "eval_logps/rejected": -301.1159973144531, "eval_loss": 0.6930870413780212, "eval_rewards/accuracies": 0.5095000267028809, "eval_rewards/chosen": 6.893646059324965e-05, "eval_rewards/margins": 0.0001285538892261684, "eval_rewards/rejected": -5.961741408100352e-05, "eval_runtime": 196.9756, "eval_samples_per_second": 10.154, "eval_steps_per_second": 5.077, "step": 40 }, { "epoch": 0.01, "learning_rate": 3.267973856209151e-07, "logits/chosen": -3.063934564590454, "logits/rejected": -3.077270984649658, "logps/chosen": -284.46533203125, "logps/rejected": -276.5115661621094, "loss": 0.6935, "rewards/accuracies": 0.4375, "rewards/chosen": -0.00018404402362648398, "rewards/margins": -0.0007704938179813325, "rewards/rejected": 0.000586449692491442, "step": 50 }, { "epoch": 0.01, "eval_logits/chosen": -2.7421915531158447, "eval_logits/rejected": -2.734881639480591, "eval_logps/chosen": -332.7299499511719, "eval_logps/rejected": -301.101318359375, "eval_loss": 0.6930976510047913, "eval_rewards/accuracies": 0.49799999594688416, "eval_rewards/chosen": 0.000194655847735703, "eval_rewards/margins": 0.00010741400183178484, "eval_rewards/rejected": 8.724184590391815e-05, "eval_runtime": 196.923, "eval_samples_per_second": 10.156, "eval_steps_per_second": 5.078, "step": 50 }, { "epoch": 0.01, "learning_rate": 3.921568627450981e-07, "logits/chosen": -3.013704776763916, "logits/rejected": -3.0369315147399902, "logps/chosen": -328.0228271484375, "logps/rejected": -295.39581298828125, "loss": 0.6932, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.001045036711730063, "rewards/margins": -0.00013430326362140477, "rewards/rejected": -0.0009107333607971668, "step": 60 }, { "epoch": 0.01, "eval_logits/chosen": -2.7421178817749023, "eval_logits/rejected": -2.7349209785461426, "eval_logps/chosen": -332.7392883300781, "eval_logps/rejected": -301.1269836425781, "eval_loss": 0.6930162906646729, "eval_rewards/accuracies": 0.5099999904632568, "eval_rewards/chosen": 0.00010118891077581793, "eval_rewards/margins": 0.0002707123931031674, "eval_rewards/rejected": -0.0001695234968792647, "eval_runtime": 196.9045, "eval_samples_per_second": 10.157, "eval_steps_per_second": 5.079, "step": 60 }, { "epoch": 0.01, "learning_rate": 4.5751633986928105e-07, "logits/chosen": -3.069620132446289, "logits/rejected": -3.0621676445007324, "logps/chosen": -301.7582702636719, "logps/rejected": -252.75460815429688, "loss": 0.6933, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.0005456652725115418, "rewards/margins": -0.0003828687476925552, "rewards/rejected": -0.00016279640840366483, "step": 70 }, { "epoch": 0.01, "eval_logits/chosen": -2.7423887252807617, "eval_logits/rejected": -2.7351789474487305, "eval_logps/chosen": -332.7414855957031, "eval_logps/rejected": -301.119384765625, "eval_loss": 0.6930652856826782, "eval_rewards/accuracies": 0.5210000276565552, "eval_rewards/chosen": 7.922769873403013e-05, "eval_rewards/margins": 0.00017279147868975997, "eval_rewards/rejected": -9.356377995572984e-05, "eval_runtime": 196.7866, "eval_samples_per_second": 10.163, "eval_steps_per_second": 5.082, "step": 70 }, { "epoch": 0.01, "learning_rate": 5.228758169934641e-07, "logits/chosen": -3.0048069953918457, "logits/rejected": -3.002398729324341, "logps/chosen": -354.9811096191406, "logps/rejected": -344.3815002441406, "loss": 0.693, "rewards/accuracies": 0.512499988079071, "rewards/chosen": 0.00019456178415566683, "rewards/margins": 0.00027323514223098755, "rewards/rejected": -7.867337990319356e-05, "step": 80 }, { "epoch": 0.01, "eval_logits/chosen": -2.7424306869506836, "eval_logits/rejected": -2.7352046966552734, "eval_logps/chosen": -332.7282409667969, "eval_logps/rejected": -301.10968017578125, "eval_loss": 0.6930477023124695, "eval_rewards/accuracies": 0.5009999871253967, "eval_rewards/chosen": 0.00021137729345355183, "eval_rewards/margins": 0.0002077910612570122, "eval_rewards/rejected": 3.5862587992596673e-06, "eval_runtime": 196.8147, "eval_samples_per_second": 10.162, "eval_steps_per_second": 5.081, "step": 80 }, { "epoch": 0.01, "learning_rate": 5.882352941176471e-07, "logits/chosen": -3.1034655570983887, "logits/rejected": -3.0644783973693848, "logps/chosen": -319.18951416015625, "logps/rejected": -283.13232421875, "loss": 0.6933, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.0005190152442082763, "rewards/margins": -0.00037205187254585326, "rewards/rejected": 0.0008910670876502991, "step": 90 }, { "epoch": 0.01, "eval_logits/chosen": -2.7423925399780273, "eval_logits/rejected": -2.735030174255371, "eval_logps/chosen": -332.74163818359375, "eval_logps/rejected": -301.1065979003906, "eval_loss": 0.6931295394897461, "eval_rewards/accuracies": 0.4975000023841858, "eval_rewards/chosen": 7.795435521984473e-05, "eval_rewards/margins": 4.392163464217447e-05, "eval_rewards/rejected": 3.403272057767026e-05, "eval_runtime": 196.7725, "eval_samples_per_second": 10.164, "eval_steps_per_second": 5.082, "step": 90 }, { "epoch": 0.01, "learning_rate": 6.535947712418302e-07, "logits/chosen": -3.0161807537078857, "logits/rejected": -3.0300960540771484, "logps/chosen": -290.94915771484375, "logps/rejected": -294.79486083984375, "loss": 0.6935, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.00010955418838420883, "rewards/margins": -0.0007335743284784257, "rewards/rejected": 0.0006240200018510222, "step": 100 }, { "epoch": 0.01, "eval_logits/chosen": -2.7424075603485107, "eval_logits/rejected": -2.735180377960205, "eval_logps/chosen": -332.7455749511719, "eval_logps/rejected": -301.0979309082031, "eval_loss": 0.6931926608085632, "eval_rewards/accuracies": 0.49399998784065247, "eval_rewards/chosen": 3.826828833553009e-05, "eval_rewards/margins": -8.247687219409272e-05, "eval_rewards/rejected": 0.00012074514233972877, "eval_runtime": 196.9629, "eval_samples_per_second": 10.154, "eval_steps_per_second": 5.077, "step": 100 }, { "epoch": 0.01, "learning_rate": 7.189542483660131e-07, "logits/chosen": -3.028716564178467, "logits/rejected": -3.024019479751587, "logps/chosen": -340.0860595703125, "logps/rejected": -301.8324890136719, "loss": 0.6925, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.0008878801017999649, "rewards/margins": 0.001357215573079884, "rewards/rejected": -0.00046933552948758006, "step": 110 }, { "epoch": 0.01, "eval_logits/chosen": -2.7424967288970947, "eval_logits/rejected": -2.7352287769317627, "eval_logps/chosen": -332.75115966796875, "eval_logps/rejected": -301.1202697753906, "eval_loss": 0.693109393119812, "eval_rewards/accuracies": 0.4894999861717224, "eval_rewards/chosen": -1.78045538632432e-05, "eval_rewards/margins": 8.483259443892166e-05, "eval_rewards/rejected": -0.00010263712465530261, "eval_runtime": 196.8794, "eval_samples_per_second": 10.159, "eval_steps_per_second": 5.079, "step": 110 }, { "epoch": 0.02, "learning_rate": 7.843137254901962e-07, "logits/chosen": -3.083116054534912, "logits/rejected": -3.059950590133667, "logps/chosen": -355.63006591796875, "logps/rejected": -278.5542907714844, "loss": 0.6927, "rewards/accuracies": 0.5625, "rewards/chosen": 0.0005385326221585274, "rewards/margins": 0.0008816570043563843, "rewards/rejected": -0.0003431244404055178, "step": 120 }, { "epoch": 0.02, "eval_logits/chosen": -2.7423059940338135, "eval_logits/rejected": -2.735048770904541, "eval_logps/chosen": -332.7540283203125, "eval_logps/rejected": -301.1204833984375, "eval_loss": 0.6931224465370178, "eval_rewards/accuracies": 0.492000013589859, "eval_rewards/chosen": -4.632035779650323e-05, "eval_rewards/margins": 5.811014852952212e-05, "eval_rewards/rejected": -0.00010443051723996177, "eval_runtime": 197.0438, "eval_samples_per_second": 10.15, "eval_steps_per_second": 5.075, "step": 120 }, { "epoch": 0.02, "learning_rate": 8.496732026143792e-07, "logits/chosen": -3.0233044624328613, "logits/rejected": -3.0531599521636963, "logps/chosen": -311.4083557128906, "logps/rejected": -283.33258056640625, "loss": 0.6931, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -6.241817118279869e-06, "rewards/margins": 0.00016770794172771275, "rewards/rejected": -0.00017394970927853137, "step": 130 }, { "epoch": 0.02, "eval_logits/chosen": -2.742424249649048, "eval_logits/rejected": -2.735227346420288, "eval_logps/chosen": -332.7500305175781, "eval_logps/rejected": -301.1299133300781, "eval_loss": 0.6930555701255798, "eval_rewards/accuracies": 0.5084999799728394, "eval_rewards/chosen": -6.5807921600935515e-06, "eval_rewards/margins": 0.00019208044977858663, "eval_rewards/rejected": -0.00019866121874656528, "eval_runtime": 197.2122, "eval_samples_per_second": 10.141, "eval_steps_per_second": 5.071, "step": 130 }, { "epoch": 0.02, "learning_rate": 9.150326797385621e-07, "logits/chosen": -2.984133243560791, "logits/rejected": -2.9490771293640137, "logps/chosen": -327.62127685546875, "logps/rejected": -287.3335266113281, "loss": 0.6932, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.0002134219103027135, "rewards/margins": -4.010857082903385e-05, "rewards/rejected": -0.00017331326671410352, "step": 140 }, { "epoch": 0.02, "eval_logits/chosen": -2.742417812347412, "eval_logits/rejected": -2.7351322174072266, "eval_logps/chosen": -332.7521667480469, "eval_logps/rejected": -301.12445068359375, "eval_loss": 0.6930928826332092, "eval_rewards/accuracies": 0.4984999895095825, "eval_rewards/chosen": -2.7572192266234197e-05, "eval_rewards/margins": 0.00011697168520186096, "eval_rewards/rejected": -0.00014454391202889383, "eval_runtime": 196.8258, "eval_samples_per_second": 10.161, "eval_steps_per_second": 5.081, "step": 140 }, { "epoch": 0.02, "learning_rate": 9.80392156862745e-07, "logits/chosen": -3.0864033699035645, "logits/rejected": -3.026364803314209, "logps/chosen": -402.3047790527344, "logps/rejected": -355.61175537109375, "loss": 0.6925, "rewards/accuracies": 0.5, "rewards/chosen": 0.0004579071537591517, "rewards/margins": 0.0012378387618809938, "rewards/rejected": -0.0007799316081218421, "step": 150 }, { "epoch": 0.02, "eval_logits/chosen": -2.74202823638916, "eval_logits/rejected": -2.7348246574401855, "eval_logps/chosen": -332.7433776855469, "eval_logps/rejected": -301.11444091796875, "eval_loss": 0.6930994987487793, "eval_rewards/accuracies": 0.5015000104904175, "eval_rewards/chosen": 6.008195850881748e-05, "eval_rewards/margins": 0.00010428918903926387, "eval_rewards/rejected": -4.4207245082361624e-05, "eval_runtime": 197.2211, "eval_samples_per_second": 10.141, "eval_steps_per_second": 5.07, "step": 150 }, { "epoch": 0.02, "learning_rate": 1.0457516339869283e-06, "logits/chosen": -3.0945613384246826, "logits/rejected": -3.0280470848083496, "logps/chosen": -308.03509521484375, "logps/rejected": -273.6764221191406, "loss": 0.6931, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -9.9400152976159e-05, "rewards/margins": 0.0001606243458809331, "rewards/rejected": -0.0002600245352368802, "step": 160 }, { "epoch": 0.02, "eval_logits/chosen": -2.741826057434082, "eval_logits/rejected": -2.7345821857452393, "eval_logps/chosen": -332.755615234375, "eval_logps/rejected": -301.11944580078125, "eval_loss": 0.6931356191635132, "eval_rewards/accuracies": 0.4934999942779541, "eval_rewards/chosen": -6.196425965754315e-05, "eval_rewards/margins": 3.2061645470093936e-05, "eval_rewards/rejected": -9.402589057572186e-05, "eval_runtime": 196.7162, "eval_samples_per_second": 10.167, "eval_steps_per_second": 5.083, "step": 160 }, { "epoch": 0.02, "learning_rate": 1.111111111111111e-06, "logits/chosen": -3.0090110301971436, "logits/rejected": -3.0139718055725098, "logps/chosen": -300.4590148925781, "logps/rejected": -270.7416076660156, "loss": 0.693, "rewards/accuracies": 0.5625, "rewards/chosen": 2.0842790036113e-05, "rewards/margins": 0.0002311690041096881, "rewards/rejected": -0.0002103261649608612, "step": 170 }, { "epoch": 0.02, "eval_logits/chosen": -2.7421629428863525, "eval_logits/rejected": -2.7350175380706787, "eval_logps/chosen": -332.75689697265625, "eval_logps/rejected": -301.1203308105469, "eval_loss": 0.6931375861167908, "eval_rewards/accuracies": 0.4925000071525574, "eval_rewards/chosen": -7.511243893532082e-05, "eval_rewards/margins": 2.809734723996371e-05, "eval_rewards/rejected": -0.0001032097716233693, "eval_runtime": 196.9725, "eval_samples_per_second": 10.154, "eval_steps_per_second": 5.077, "step": 170 }, { "epoch": 0.02, "learning_rate": 1.1764705882352942e-06, "logits/chosen": -3.080929756164551, "logits/rejected": -2.997765302658081, "logps/chosen": -412.30596923828125, "logps/rejected": -311.81390380859375, "loss": 0.6938, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.0013919133925810456, "rewards/margins": -0.0012244291137903929, "rewards/rejected": -0.00016748439520597458, "step": 180 }, { "epoch": 0.02, "eval_logits/chosen": -2.742008924484253, "eval_logits/rejected": -2.734745979309082, "eval_logps/chosen": -332.7729797363281, "eval_logps/rejected": -301.13446044921875, "eval_loss": 0.693146824836731, "eval_rewards/accuracies": 0.5, "eval_rewards/chosen": -0.0002355735341552645, "eval_rewards/margins": 9.156420674116816e-06, "eval_rewards/rejected": -0.0002447299484629184, "eval_runtime": 196.9856, "eval_samples_per_second": 10.153, "eval_steps_per_second": 5.077, "step": 180 }, { "epoch": 0.02, "learning_rate": 1.2418300653594772e-06, "logits/chosen": -2.985764980316162, "logits/rejected": -2.968858480453491, "logps/chosen": -301.422607421875, "logps/rejected": -254.9043426513672, "loss": 0.6929, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.0002511686470825225, "rewards/margins": 0.00046358705731108785, "rewards/rejected": -0.000714755617082119, "step": 190 }, { "epoch": 0.02, "eval_logits/chosen": -2.742161273956299, "eval_logits/rejected": -2.7349143028259277, "eval_logps/chosen": -332.7518005371094, "eval_logps/rejected": -301.1442565917969, "eval_loss": 0.6929922699928284, "eval_rewards/accuracies": 0.5260000228881836, "eval_rewards/chosen": -2.4143202608684078e-05, "eval_rewards/margins": 0.00031821097945794463, "eval_rewards/rejected": -0.0003423541784286499, "eval_runtime": 196.8806, "eval_samples_per_second": 10.158, "eval_steps_per_second": 5.079, "step": 190 }, { "epoch": 0.03, "learning_rate": 1.3071895424836604e-06, "logits/chosen": -3.056624174118042, "logits/rejected": -3.0141055583953857, "logps/chosen": -298.284912109375, "logps/rejected": -298.84381103515625, "loss": 0.6924, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 9.34753697947599e-05, "rewards/margins": 0.0014477561926469207, "rewards/rejected": -0.0013542806264013052, "step": 200 }, { "epoch": 0.03, "eval_logits/chosen": -2.7422404289245605, "eval_logits/rejected": -2.7349939346313477, "eval_logps/chosen": -332.7804260253906, "eval_logps/rejected": -301.1587829589844, "eval_loss": 0.693062961101532, "eval_rewards/accuracies": 0.5040000081062317, "eval_rewards/chosen": -0.00031018684967420995, "eval_rewards/margins": 0.00017730562831275165, "eval_rewards/rejected": -0.00048749250709079206, "eval_runtime": 197.0551, "eval_samples_per_second": 10.149, "eval_steps_per_second": 5.075, "step": 200 }, { "epoch": 0.03, "learning_rate": 1.3725490196078434e-06, "logits/chosen": -3.103909730911255, "logits/rejected": -3.096003293991089, "logps/chosen": -335.46551513671875, "logps/rejected": -304.6739196777344, "loss": 0.6932, "rewards/accuracies": 0.5, "rewards/chosen": -0.000499956717249006, "rewards/margins": -8.5618878074456e-05, "rewards/rejected": -0.00041433790465816855, "step": 210 }, { "epoch": 0.03, "eval_logits/chosen": -2.7421951293945312, "eval_logits/rejected": -2.7350261211395264, "eval_logps/chosen": -332.7716979980469, "eval_logps/rejected": -301.14794921875, "eval_loss": 0.6930733919143677, "eval_rewards/accuracies": 0.5049999952316284, "eval_rewards/chosen": -0.00022285518934950233, "eval_rewards/margins": 0.0001561456301715225, "eval_rewards/rejected": -0.0003790008195210248, "eval_runtime": 196.773, "eval_samples_per_second": 10.164, "eval_steps_per_second": 5.082, "step": 210 }, { "epoch": 0.03, "learning_rate": 1.4379084967320261e-06, "logits/chosen": -3.006915330886841, "logits/rejected": -2.964789628982544, "logps/chosen": -329.61102294921875, "logps/rejected": -297.4481506347656, "loss": 0.6934, "rewards/accuracies": 0.5, "rewards/chosen": -0.000635097618214786, "rewards/margins": -0.0005649608210660517, "rewards/rejected": -7.013681170064956e-05, "step": 220 }, { "epoch": 0.03, "eval_logits/chosen": -2.741764783859253, "eval_logits/rejected": -2.734501600265503, "eval_logps/chosen": -332.7756042480469, "eval_logps/rejected": -301.16070556640625, "eval_loss": 0.6930290460586548, "eval_rewards/accuracies": 0.4964999854564667, "eval_rewards/chosen": -0.00026174308732151985, "eval_rewards/margins": 0.00024544313782826066, "eval_rewards/rejected": -0.0005071861669421196, "eval_runtime": 197.1268, "eval_samples_per_second": 10.146, "eval_steps_per_second": 5.073, "step": 220 }, { "epoch": 0.03, "learning_rate": 1.5032679738562091e-06, "logits/chosen": -2.974337577819824, "logits/rejected": -2.9849658012390137, "logps/chosen": -280.42303466796875, "logps/rejected": -321.53265380859375, "loss": 0.693, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -0.0003795806842390448, "rewards/margins": 0.00029520891257561743, "rewards/rejected": -0.000674789713229984, "step": 230 }, { "epoch": 0.03, "eval_logits/chosen": -2.742231845855713, "eval_logits/rejected": -2.7350893020629883, "eval_logps/chosen": -332.79168701171875, "eval_logps/rejected": -301.2016906738281, "eval_loss": 0.6929041147232056, "eval_rewards/accuracies": 0.5289999842643738, "eval_rewards/chosen": -0.0004225261218380183, "eval_rewards/margins": 0.0004941746010445058, "eval_rewards/rejected": -0.0009167007519863546, "eval_runtime": 197.2194, "eval_samples_per_second": 10.141, "eval_steps_per_second": 5.07, "step": 230 }, { "epoch": 0.03, "learning_rate": 1.5686274509803923e-06, "logits/chosen": -2.9993062019348145, "logits/rejected": -3.0187880992889404, "logps/chosen": -287.79766845703125, "logps/rejected": -308.9731750488281, "loss": 0.6928, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.0008283854695037007, "rewards/margins": 0.0007049053674563766, "rewards/rejected": -0.0015332909533753991, "step": 240 }, { "epoch": 0.03, "eval_logits/chosen": -2.742602825164795, "eval_logits/rejected": -2.7353687286376953, "eval_logps/chosen": -332.8048095703125, "eval_logps/rejected": -301.19207763671875, "eval_loss": 0.6930183172225952, "eval_rewards/accuracies": 0.5174999833106995, "eval_rewards/chosen": -0.0005539001431316137, "eval_rewards/margins": 0.00026647234335541725, "eval_rewards/rejected": -0.000820372486487031, "eval_runtime": 197.1539, "eval_samples_per_second": 10.144, "eval_steps_per_second": 5.072, "step": 240 }, { "epoch": 0.03, "learning_rate": 1.6339869281045753e-06, "logits/chosen": -3.0508596897125244, "logits/rejected": -3.0222580432891846, "logps/chosen": -373.35943603515625, "logps/rejected": -316.1955871582031, "loss": 0.6935, "rewards/accuracies": 0.5, "rewards/chosen": -0.0007197518716566265, "rewards/margins": -0.000629595888312906, "rewards/rejected": -9.015606337925419e-05, "step": 250 }, { "epoch": 0.03, "eval_logits/chosen": -2.7422688007354736, "eval_logits/rejected": -2.735180139541626, "eval_logps/chosen": -332.8021240234375, "eval_logps/rejected": -301.2145080566406, "eval_loss": 0.6928929686546326, "eval_rewards/accuracies": 0.5370000004768372, "eval_rewards/chosen": -0.0005272579728625715, "eval_rewards/margins": 0.0005174549296498299, "eval_rewards/rejected": -0.0010447128443047404, "eval_runtime": 197.2346, "eval_samples_per_second": 10.14, "eval_steps_per_second": 5.07, "step": 250 }, { "epoch": 0.03, "learning_rate": 1.6993464052287585e-06, "logits/chosen": -3.073085308074951, "logits/rejected": -3.0740902423858643, "logps/chosen": -346.2541809082031, "logps/rejected": -297.8416748046875, "loss": 0.6934, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.0008511870983056724, "rewards/margins": -0.0004448608378879726, "rewards/rejected": -0.0004063262604176998, "step": 260 }, { "epoch": 0.03, "eval_logits/chosen": -2.742314100265503, "eval_logits/rejected": -2.73513126373291, "eval_logps/chosen": -332.82208251953125, "eval_logps/rejected": -301.2392272949219, "eval_loss": 0.6928689479827881, "eval_rewards/accuracies": 0.5274999737739563, "eval_rewards/chosen": -0.0007265734602697194, "eval_rewards/margins": 0.0005655785789713264, "eval_rewards/rejected": -0.0012921523302793503, "eval_runtime": 196.9217, "eval_samples_per_second": 10.156, "eval_steps_per_second": 5.078, "step": 260 }, { "epoch": 0.04, "learning_rate": 1.7647058823529414e-06, "logits/chosen": -3.002138137817383, "logits/rejected": -2.992426872253418, "logps/chosen": -294.773681640625, "logps/rejected": -267.15521240234375, "loss": 0.6937, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -0.0016117170453071594, "rewards/margins": -0.0011218027211725712, "rewards/rejected": -0.0004899144405499101, "step": 270 }, { "epoch": 0.04, "eval_logits/chosen": -2.7421512603759766, "eval_logits/rejected": -2.734945774078369, "eval_logps/chosen": -332.82757568359375, "eval_logps/rejected": -301.2431335449219, "eval_loss": 0.692876935005188, "eval_rewards/accuracies": 0.5149999856948853, "eval_rewards/chosen": -0.0007814643904566765, "eval_rewards/margins": 0.0005501382402144372, "eval_rewards/rejected": -0.0013316025724634528, "eval_runtime": 196.9036, "eval_samples_per_second": 10.157, "eval_steps_per_second": 5.079, "step": 270 }, { "epoch": 0.04, "learning_rate": 1.8300653594771242e-06, "logits/chosen": -3.0527281761169434, "logits/rejected": -3.0102436542510986, "logps/chosen": -380.1999206542969, "logps/rejected": -336.00726318359375, "loss": 0.6925, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.0011029274901375175, "rewards/margins": 0.0013822071487084031, "rewards/rejected": -0.002485134406015277, "step": 280 }, { "epoch": 0.04, "eval_logits/chosen": -2.742446184158325, "eval_logits/rejected": -2.7353570461273193, "eval_logps/chosen": -332.843505859375, "eval_logps/rejected": -301.2621765136719, "eval_loss": 0.6928617358207703, "eval_rewards/accuracies": 0.5270000100135803, "eval_rewards/chosen": -0.0009410838829353452, "eval_rewards/margins": 0.0005805276450701058, "eval_rewards/rejected": -0.0015216115862131119, "eval_runtime": 197.0489, "eval_samples_per_second": 10.15, "eval_steps_per_second": 5.075, "step": 280 }, { "epoch": 0.04, "learning_rate": 1.8954248366013072e-06, "logits/chosen": -3.0255136489868164, "logits/rejected": -3.032275676727295, "logps/chosen": -337.3700256347656, "logps/rejected": -302.2608642578125, "loss": 0.6927, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.0005425974959507585, "rewards/margins": 0.0008994883974082768, "rewards/rejected": -0.0014420859515666962, "step": 290 }, { "epoch": 0.04, "eval_logits/chosen": -2.741929292678833, "eval_logits/rejected": -2.734889030456543, "eval_logps/chosen": -332.8692626953125, "eval_logps/rejected": -301.28790283203125, "eval_loss": 0.6928617358207703, "eval_rewards/accuracies": 0.5320000052452087, "eval_rewards/chosen": -0.001198362559080124, "eval_rewards/margins": 0.0005806823610328138, "eval_rewards/rejected": -0.0017790448619052768, "eval_runtime": 197.0895, "eval_samples_per_second": 10.148, "eval_steps_per_second": 5.074, "step": 290 }, { "epoch": 0.04, "learning_rate": 1.96078431372549e-06, "logits/chosen": -3.123136281967163, "logits/rejected": -3.0728859901428223, "logps/chosen": -359.8968200683594, "logps/rejected": -293.3435974121094, "loss": 0.693, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.0015214609447866678, "rewards/margins": 0.0003471664385870099, "rewards/rejected": -0.001868627266958356, "step": 300 }, { "epoch": 0.04, "eval_logits/chosen": -2.7420578002929688, "eval_logits/rejected": -2.734994411468506, "eval_logps/chosen": -332.8953857421875, "eval_logps/rejected": -301.3324890136719, "eval_loss": 0.6927695870399475, "eval_rewards/accuracies": 0.5394999980926514, "eval_rewards/chosen": -0.001459623803384602, "eval_rewards/margins": 0.000765010598115623, "eval_rewards/rejected": -0.002224634401500225, "eval_runtime": 197.096, "eval_samples_per_second": 10.147, "eval_steps_per_second": 5.074, "step": 300 }, { "epoch": 0.04, "learning_rate": 2.0261437908496734e-06, "logits/chosen": -2.9720630645751953, "logits/rejected": -2.9839987754821777, "logps/chosen": -356.2770080566406, "logps/rejected": -334.4881286621094, "loss": 0.6922, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.0012176050804555416, "rewards/margins": 0.0018635308369994164, "rewards/rejected": -0.003081135917454958, "step": 310 }, { "epoch": 0.04, "eval_logits/chosen": -2.7417304515838623, "eval_logits/rejected": -2.7347323894500732, "eval_logps/chosen": -332.9208068847656, "eval_logps/rejected": -301.3729248046875, "eval_loss": 0.692695140838623, "eval_rewards/accuracies": 0.5509999990463257, "eval_rewards/chosen": -0.0017142510041594505, "eval_rewards/margins": 0.0009148998069576919, "eval_rewards/rejected": -0.0026291508693248034, "eval_runtime": 197.0911, "eval_samples_per_second": 10.148, "eval_steps_per_second": 5.074, "step": 310 }, { "epoch": 0.04, "learning_rate": 2.0915032679738565e-06, "logits/chosen": -3.0705294609069824, "logits/rejected": -3.030722141265869, "logps/chosen": -320.76031494140625, "logps/rejected": -293.1005859375, "loss": 0.693, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -0.002190458821132779, "rewards/margins": 0.0002239603054476902, "rewards/rejected": -0.00241441885009408, "step": 320 }, { "epoch": 0.04, "eval_logits/chosen": -2.7417984008789062, "eval_logits/rejected": -2.734755039215088, "eval_logps/chosen": -332.95001220703125, "eval_logps/rejected": -301.4145202636719, "eval_loss": 0.6926332712173462, "eval_rewards/accuracies": 0.5584999918937683, "eval_rewards/chosen": -0.002005940768867731, "eval_rewards/margins": 0.0010391019750386477, "eval_rewards/rejected": -0.003045042511075735, "eval_runtime": 196.943, "eval_samples_per_second": 10.155, "eval_steps_per_second": 5.078, "step": 320 }, { "epoch": 0.04, "learning_rate": 2.1568627450980393e-06, "logits/chosen": -3.1108169555664062, "logits/rejected": -3.049923896789551, "logps/chosen": -356.15911865234375, "logps/rejected": -295.27740478515625, "loss": 0.6927, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0020984704606235027, "rewards/margins": 0.000873108278028667, "rewards/rejected": -0.0029715788550674915, "step": 330 }, { "epoch": 0.04, "eval_logits/chosen": -2.741525650024414, "eval_logits/rejected": -2.734499454498291, "eval_logps/chosen": -332.9777526855469, "eval_logps/rejected": -301.4495544433594, "eval_loss": 0.6925971508026123, "eval_rewards/accuracies": 0.5519999861717224, "eval_rewards/chosen": -0.002283054403960705, "eval_rewards/margins": 0.0011124503798782825, "eval_rewards/rejected": -0.0033955047838389874, "eval_runtime": 197.0557, "eval_samples_per_second": 10.149, "eval_steps_per_second": 5.075, "step": 330 }, { "epoch": 0.04, "learning_rate": 2.222222222222222e-06, "logits/chosen": -3.033808469772339, "logits/rejected": -2.980825185775757, "logps/chosen": -319.2254333496094, "logps/rejected": -262.8683166503906, "loss": 0.6924, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.0029052558820694685, "rewards/margins": 0.0014379608910530806, "rewards/rejected": -0.004343216773122549, "step": 340 }, { "epoch": 0.04, "eval_logits/chosen": -2.7416186332702637, "eval_logits/rejected": -2.734678268432617, "eval_logps/chosen": -333.0244445800781, "eval_logps/rejected": -301.525634765625, "eval_loss": 0.6924512386322021, "eval_rewards/accuracies": 0.5600000023841858, "eval_rewards/chosen": -0.0027503310702741146, "eval_rewards/margins": 0.0014057998778298497, "eval_rewards/rejected": -0.00415613129734993, "eval_runtime": 197.052, "eval_samples_per_second": 10.15, "eval_steps_per_second": 5.075, "step": 340 }, { "epoch": 0.05, "learning_rate": 2.2875816993464053e-06, "logits/chosen": -3.092961072921753, "logits/rejected": -3.0653040409088135, "logps/chosen": -388.9897155761719, "logps/rejected": -309.00787353515625, "loss": 0.6921, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.0030721924267709255, "rewards/margins": 0.002087064553052187, "rewards/rejected": -0.0051592574454844, "step": 350 }, { "epoch": 0.05, "eval_logits/chosen": -2.741395950317383, "eval_logits/rejected": -2.73449444770813, "eval_logps/chosen": -333.0765380859375, "eval_logps/rejected": -301.61993408203125, "eval_loss": 0.6922417283058167, "eval_rewards/accuracies": 0.5724999904632568, "eval_rewards/chosen": -0.0032712086103856564, "eval_rewards/margins": 0.0018283347599208355, "eval_rewards/rejected": -0.0050995429046452045, "eval_runtime": 196.9644, "eval_samples_per_second": 10.154, "eval_steps_per_second": 5.077, "step": 350 }, { "epoch": 0.05, "learning_rate": 2.3529411764705885e-06, "logits/chosen": -3.0397391319274902, "logits/rejected": -2.995060682296753, "logps/chosen": -311.85931396484375, "logps/rejected": -305.2904968261719, "loss": 0.6918, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.003052308689802885, "rewards/margins": 0.002707479055970907, "rewards/rejected": -0.005759787745773792, "step": 360 }, { "epoch": 0.05, "eval_logits/chosen": -2.7416577339172363, "eval_logits/rejected": -2.7348086833953857, "eval_logps/chosen": -333.14324951171875, "eval_logps/rejected": -301.695556640625, "eval_loss": 0.6921982169151306, "eval_rewards/accuracies": 0.5835000276565552, "eval_rewards/chosen": -0.003938698675483465, "eval_rewards/margins": 0.0019165691919624805, "eval_rewards/rejected": -0.005855268333107233, "eval_runtime": 197.1033, "eval_samples_per_second": 10.147, "eval_steps_per_second": 5.073, "step": 360 }, { "epoch": 0.05, "learning_rate": 2.4183006535947716e-06, "logits/chosen": -2.9846248626708984, "logits/rejected": -3.012056350708008, "logps/chosen": -320.0126647949219, "logps/rejected": -288.37353515625, "loss": 0.6918, "rewards/accuracies": 0.6875, "rewards/chosen": -0.004556222353130579, "rewards/margins": 0.0027006464079022408, "rewards/rejected": -0.007256869226694107, "step": 370 }, { "epoch": 0.05, "eval_logits/chosen": -2.7418692111968994, "eval_logits/rejected": -2.73514461517334, "eval_logps/chosen": -333.22711181640625, "eval_logps/rejected": -301.8189392089844, "eval_loss": 0.6920028328895569, "eval_rewards/accuracies": 0.5924999713897705, "eval_rewards/chosen": -0.0047774785198271275, "eval_rewards/margins": 0.00231174030341208, "eval_rewards/rejected": -0.007089219056069851, "eval_runtime": 197.0339, "eval_samples_per_second": 10.151, "eval_steps_per_second": 5.075, "step": 370 }, { "epoch": 0.05, "learning_rate": 2.4836601307189544e-06, "logits/chosen": -3.0387444496154785, "logits/rejected": -3.033735752105713, "logps/chosen": -344.07342529296875, "logps/rejected": -296.765380859375, "loss": 0.6905, "rewards/accuracies": 0.625, "rewards/chosen": -0.003909797873347998, "rewards/margins": 0.0054032644256949425, "rewards/rejected": -0.009313062764704227, "step": 380 }, { "epoch": 0.05, "eval_logits/chosen": -2.7420990467071533, "eval_logits/rejected": -2.735419511795044, "eval_logps/chosen": -333.339599609375, "eval_logps/rejected": -301.9618835449219, "eval_loss": 0.6918540596961975, "eval_rewards/accuracies": 0.5855000019073486, "eval_rewards/chosen": -0.005901523865759373, "eval_rewards/margins": 0.002617142628878355, "eval_rewards/rejected": -0.00851866602897644, "eval_runtime": 196.9144, "eval_samples_per_second": 10.157, "eval_steps_per_second": 5.078, "step": 380 }, { "epoch": 0.05, "learning_rate": 2.549019607843137e-06, "logits/chosen": -2.9668126106262207, "logits/rejected": -2.9344594478607178, "logps/chosen": -352.40155029296875, "logps/rejected": -289.9767150878906, "loss": 0.691, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.005256765056401491, "rewards/margins": 0.004349336959421635, "rewards/rejected": -0.009606102481484413, "step": 390 }, { "epoch": 0.05, "eval_logits/chosen": -2.7425131797790527, "eval_logits/rejected": -2.735957622528076, "eval_logps/chosen": -333.4939880371094, "eval_logps/rejected": -302.1752624511719, "eval_loss": 0.6915651559829712, "eval_rewards/accuracies": 0.5924999713897705, "eval_rewards/chosen": -0.0074457875452935696, "eval_rewards/margins": 0.003206492168828845, "eval_rewards/rejected": -0.010652278549969196, "eval_runtime": 196.9498, "eval_samples_per_second": 10.155, "eval_steps_per_second": 5.077, "step": 390 }, { "epoch": 0.05, "learning_rate": 2.6143790849673208e-06, "logits/chosen": -3.0899410247802734, "logits/rejected": -3.1250669956207275, "logps/chosen": -339.81781005859375, "logps/rejected": -345.57635498046875, "loss": 0.6929, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -0.008469333872199059, "rewards/margins": 0.0005970595520921052, "rewards/rejected": -0.009066394530236721, "step": 400 }, { "epoch": 0.05, "eval_logits/chosen": -2.742565155029297, "eval_logits/rejected": -2.7361464500427246, "eval_logps/chosen": -333.64605712890625, "eval_logps/rejected": -302.40118408203125, "eval_loss": 0.6912031173706055, "eval_rewards/accuracies": 0.6004999876022339, "eval_rewards/chosen": -0.008966467343270779, "eval_rewards/margins": 0.0039451997727155685, "eval_rewards/rejected": -0.012911667115986347, "eval_runtime": 196.9145, "eval_samples_per_second": 10.157, "eval_steps_per_second": 5.078, "step": 400 }, { "epoch": 0.05, "learning_rate": 2.6797385620915036e-06, "logits/chosen": -3.0014090538024902, "logits/rejected": -2.987090587615967, "logps/chosen": -300.54693603515625, "logps/rejected": -255.03335571289062, "loss": 0.6911, "rewards/accuracies": 0.5625, "rewards/chosen": -0.010720537044107914, "rewards/margins": 0.004079930018633604, "rewards/rejected": -0.01480046845972538, "step": 410 }, { "epoch": 0.05, "eval_logits/chosen": -2.7424871921539307, "eval_logits/rejected": -2.7361936569213867, "eval_logps/chosen": -333.7640380859375, "eval_logps/rejected": -302.5548095703125, "eval_loss": 0.6910296678543091, "eval_rewards/accuracies": 0.6000000238418579, "eval_rewards/chosen": -0.01014601718634367, "eval_rewards/margins": 0.004301996435970068, "eval_rewards/rejected": -0.01444801315665245, "eval_runtime": 197.0865, "eval_samples_per_second": 10.148, "eval_steps_per_second": 5.074, "step": 410 }, { "epoch": 0.05, "learning_rate": 2.7450980392156867e-06, "logits/chosen": -3.0133562088012695, "logits/rejected": -2.994236469268799, "logps/chosen": -336.31365966796875, "logps/rejected": -323.908935546875, "loss": 0.6914, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.010945516638457775, "rewards/margins": 0.0036502934526652098, "rewards/rejected": -0.014595809392631054, "step": 420 }, { "epoch": 0.05, "eval_logits/chosen": -2.742440938949585, "eval_logits/rejected": -2.7361104488372803, "eval_logps/chosen": -333.8959655761719, "eval_logps/rejected": -302.7467041015625, "eval_loss": 0.6907373070716858, "eval_rewards/accuracies": 0.6044999957084656, "eval_rewards/chosen": -0.011465570889413357, "eval_rewards/margins": 0.004901566542685032, "eval_rewards/rejected": -0.01636713556945324, "eval_runtime": 197.0399, "eval_samples_per_second": 10.15, "eval_steps_per_second": 5.075, "step": 420 }, { "epoch": 0.06, "learning_rate": 2.8104575163398695e-06, "logits/chosen": -3.0821690559387207, "logits/rejected": -3.0689940452575684, "logps/chosen": -339.2743225097656, "logps/rejected": -310.23028564453125, "loss": 0.6921, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.014210025779902935, "rewards/margins": 0.002133010420948267, "rewards/rejected": -0.01634303480386734, "step": 430 }, { "epoch": 0.06, "eval_logits/chosen": -2.7430331707000732, "eval_logits/rejected": -2.7368240356445312, "eval_logps/chosen": -334.0163269042969, "eval_logps/rejected": -302.885498046875, "eval_loss": 0.6906515955924988, "eval_rewards/accuracies": 0.590499997138977, "eval_rewards/chosen": -0.012668982148170471, "eval_rewards/margins": 0.005086148623377085, "eval_rewards/rejected": -0.017755132168531418, "eval_runtime": 196.9624, "eval_samples_per_second": 10.154, "eval_steps_per_second": 5.077, "step": 430 }, { "epoch": 0.06, "learning_rate": 2.8758169934640523e-06, "logits/chosen": -3.0886878967285156, "logits/rejected": -3.089543342590332, "logps/chosen": -328.46746826171875, "logps/rejected": -295.8978576660156, "loss": 0.6918, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.013683220371603966, "rewards/margins": 0.0028565835673362017, "rewards/rejected": -0.01653980277478695, "step": 440 }, { "epoch": 0.06, "eval_logits/chosen": -2.7425713539123535, "eval_logits/rejected": -2.736445426940918, "eval_logps/chosen": -334.0881652832031, "eval_logps/rejected": -303.01953125, "eval_loss": 0.6903461813926697, "eval_rewards/accuracies": 0.6004999876022339, "eval_rewards/chosen": -0.013387652114033699, "eval_rewards/margins": 0.005707699339836836, "eval_rewards/rejected": -0.019095350056886673, "eval_runtime": 196.9075, "eval_samples_per_second": 10.157, "eval_steps_per_second": 5.079, "step": 440 }, { "epoch": 0.06, "learning_rate": 2.9411764705882355e-06, "logits/chosen": -3.083773136138916, "logits/rejected": -3.0679244995117188, "logps/chosen": -336.1562194824219, "logps/rejected": -342.18817138671875, "loss": 0.6928, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.017543237656354904, "rewards/margins": 0.0008635501144453883, "rewards/rejected": -0.018406789749860764, "step": 450 }, { "epoch": 0.06, "eval_logits/chosen": -2.7423670291900635, "eval_logits/rejected": -2.7363510131835938, "eval_logps/chosen": -334.1518249511719, "eval_logps/rejected": -303.076416015625, "eval_loss": 0.6903823018074036, "eval_rewards/accuracies": 0.6060000061988831, "eval_rewards/chosen": -0.014023885130882263, "eval_rewards/margins": 0.00563990930095315, "eval_rewards/rejected": -0.019663793966174126, "eval_runtime": 196.8415, "eval_samples_per_second": 10.16, "eval_steps_per_second": 5.08, "step": 450 }, { "epoch": 0.06, "learning_rate": 3.0065359477124182e-06, "logits/chosen": -3.0067667961120605, "logits/rejected": -2.99423885345459, "logps/chosen": -310.811767578125, "logps/rejected": -287.95330810546875, "loss": 0.6894, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.014639434404671192, "rewards/margins": 0.007508780807256699, "rewards/rejected": -0.022148214280605316, "step": 460 }, { "epoch": 0.06, "eval_logits/chosen": -2.742067575454712, "eval_logits/rejected": -2.736116409301758, "eval_logps/chosen": -334.324951171875, "eval_logps/rejected": -303.3060607910156, "eval_loss": 0.6901097297668457, "eval_rewards/accuracies": 0.6035000085830688, "eval_rewards/chosen": -0.01575511507689953, "eval_rewards/margins": 0.006205403245985508, "eval_rewards/rejected": -0.021960517391562462, "eval_runtime": 197.0302, "eval_samples_per_second": 10.151, "eval_steps_per_second": 5.075, "step": 460 }, { "epoch": 0.06, "learning_rate": 3.071895424836602e-06, "logits/chosen": -3.0282609462738037, "logits/rejected": -3.040301561355591, "logps/chosen": -332.8352966308594, "logps/rejected": -307.71649169921875, "loss": 0.6894, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.013653397560119629, "rewards/margins": 0.0075346939265728, "rewards/rejected": -0.02118808962404728, "step": 470 }, { "epoch": 0.06, "eval_logits/chosen": -2.741609811782837, "eval_logits/rejected": -2.735790967941284, "eval_logps/chosen": -334.572021484375, "eval_logps/rejected": -303.6496276855469, "eval_loss": 0.6896440386772156, "eval_rewards/accuracies": 0.6019999980926514, "eval_rewards/chosen": -0.018226245418190956, "eval_rewards/margins": 0.007169577293097973, "eval_rewards/rejected": -0.025395819917321205, "eval_runtime": 197.0911, "eval_samples_per_second": 10.148, "eval_steps_per_second": 5.074, "step": 470 }, { "epoch": 0.06, "learning_rate": 3.1372549019607846e-06, "logits/chosen": -3.0256853103637695, "logits/rejected": -2.999748706817627, "logps/chosen": -343.3094482421875, "logps/rejected": -283.9912414550781, "loss": 0.6896, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.017795979976654053, "rewards/margins": 0.007141630165278912, "rewards/rejected": -0.02493760921061039, "step": 480 }, { "epoch": 0.06, "eval_logits/chosen": -2.7408573627471924, "eval_logits/rejected": -2.735177755355835, "eval_logps/chosen": -334.8725280761719, "eval_logps/rejected": -304.0498962402344, "eval_loss": 0.6891666054725647, "eval_rewards/accuracies": 0.6104999780654907, "eval_rewards/chosen": -0.021231109276413918, "eval_rewards/margins": 0.00816798210144043, "eval_rewards/rejected": -0.029399089515209198, "eval_runtime": 196.9503, "eval_samples_per_second": 10.155, "eval_steps_per_second": 5.077, "step": 480 }, { "epoch": 0.06, "learning_rate": 3.2026143790849674e-06, "logits/chosen": -3.0702672004699707, "logits/rejected": -3.0584235191345215, "logps/chosen": -322.9806823730469, "logps/rejected": -261.50433349609375, "loss": 0.6878, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.025627601891756058, "rewards/margins": 0.010860485024750233, "rewards/rejected": -0.03648808225989342, "step": 490 }, { "epoch": 0.06, "eval_logits/chosen": -2.740217447280884, "eval_logits/rejected": -2.7347195148468018, "eval_logps/chosen": -335.2659606933594, "eval_logps/rejected": -304.5755920410156, "eval_loss": 0.6885358095169067, "eval_rewards/accuracies": 0.6079999804496765, "eval_rewards/chosen": -0.025165580213069916, "eval_rewards/margins": 0.009489987045526505, "eval_rewards/rejected": -0.03465556725859642, "eval_runtime": 197.049, "eval_samples_per_second": 10.15, "eval_steps_per_second": 5.075, "step": 490 }, { "epoch": 0.07, "learning_rate": 3.2679738562091506e-06, "logits/chosen": -2.9991683959960938, "logits/rejected": -2.995518922805786, "logps/chosen": -305.4998474121094, "logps/rejected": -274.02459716796875, "loss": 0.6893, "rewards/accuracies": 0.625, "rewards/chosen": -0.027515623718500137, "rewards/margins": 0.00802281778305769, "rewards/rejected": -0.035538434982299805, "step": 500 }, { "epoch": 0.07, "eval_logits/chosen": -2.7397196292877197, "eval_logits/rejected": -2.734415292739868, "eval_logps/chosen": -335.75225830078125, "eval_logps/rejected": -305.1786193847656, "eval_loss": 0.6879965662956238, "eval_rewards/accuracies": 0.6144999861717224, "eval_rewards/chosen": -0.030028536915779114, "eval_rewards/margins": 0.010657698847353458, "eval_rewards/rejected": -0.040686242282390594, "eval_runtime": 197.0059, "eval_samples_per_second": 10.152, "eval_steps_per_second": 5.076, "step": 500 }, { "epoch": 0.07, "learning_rate": 3.3333333333333333e-06, "logits/chosen": -2.9843194484710693, "logits/rejected": -2.99456524848938, "logps/chosen": -302.5033264160156, "logps/rejected": -304.94403076171875, "loss": 0.6898, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.03312421962618828, "rewards/margins": 0.007043605204671621, "rewards/rejected": -0.040167830884456635, "step": 510 }, { "epoch": 0.07, "eval_logits/chosen": -2.7393970489501953, "eval_logits/rejected": -2.734170436859131, "eval_logps/chosen": -336.0675048828125, "eval_logps/rejected": -305.600830078125, "eval_loss": 0.6874927282333374, "eval_rewards/accuracies": 0.6169999837875366, "eval_rewards/chosen": -0.03318093344569206, "eval_rewards/margins": 0.011727489531040192, "eval_rewards/rejected": -0.044908422976732254, "eval_runtime": 197.0533, "eval_samples_per_second": 10.15, "eval_steps_per_second": 5.075, "step": 510 }, { "epoch": 0.07, "learning_rate": 3.398692810457517e-06, "logits/chosen": -2.9906728267669678, "logits/rejected": -2.9100711345672607, "logps/chosen": -305.5321960449219, "logps/rejected": -309.1561584472656, "loss": 0.6869, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.03266731649637222, "rewards/margins": 0.012893171980977058, "rewards/rejected": -0.04556048661470413, "step": 520 }, { "epoch": 0.07, "eval_logits/chosen": -2.7389254570007324, "eval_logits/rejected": -2.7339720726013184, "eval_logps/chosen": -336.4316711425781, "eval_logps/rejected": -306.0887451171875, "eval_loss": 0.6869123578071594, "eval_rewards/accuracies": 0.6134999990463257, "eval_rewards/chosen": -0.03682265803217888, "eval_rewards/margins": 0.012964564375579357, "eval_rewards/rejected": -0.04978722333908081, "eval_runtime": 196.9405, "eval_samples_per_second": 10.155, "eval_steps_per_second": 5.078, "step": 520 }, { "epoch": 0.07, "learning_rate": 3.4640522875816997e-06, "logits/chosen": -3.0058653354644775, "logits/rejected": -3.0059714317321777, "logps/chosen": -305.9553527832031, "logps/rejected": -276.55523681640625, "loss": 0.686, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.04011048376560211, "rewards/margins": 0.014879104681313038, "rewards/rejected": -0.054989587515592575, "step": 530 }, { "epoch": 0.07, "eval_logits/chosen": -2.7384033203125, "eval_logits/rejected": -2.733745574951172, "eval_logps/chosen": -336.9488525390625, "eval_logps/rejected": -306.7417907714844, "eval_loss": 0.6862883567810059, "eval_rewards/accuracies": 0.6150000095367432, "eval_rewards/chosen": -0.0419941246509552, "eval_rewards/margins": 0.014323660172522068, "eval_rewards/rejected": -0.05631778761744499, "eval_runtime": 197.1253, "eval_samples_per_second": 10.146, "eval_steps_per_second": 5.073, "step": 530 }, { "epoch": 0.07, "learning_rate": 3.529411764705883e-06, "logits/chosen": -3.044787883758545, "logits/rejected": -3.0278046131134033, "logps/chosen": -347.0563659667969, "logps/rejected": -308.2681884765625, "loss": 0.6835, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.040227990597486496, "rewards/margins": 0.01987212896347046, "rewards/rejected": -0.060100119560956955, "step": 540 }, { "epoch": 0.07, "eval_logits/chosen": -2.738213062286377, "eval_logits/rejected": -2.7338826656341553, "eval_logps/chosen": -337.69158935546875, "eval_logps/rejected": -307.684814453125, "eval_loss": 0.6853721737861633, "eval_rewards/accuracies": 0.6115000247955322, "eval_rewards/chosen": -0.04942203685641289, "eval_rewards/margins": 0.016325712203979492, "eval_rewards/rejected": -0.06574775278568268, "eval_runtime": 196.8498, "eval_samples_per_second": 10.16, "eval_steps_per_second": 5.08, "step": 540 }, { "epoch": 0.07, "learning_rate": 3.5947712418300657e-06, "logits/chosen": -3.015363931655884, "logits/rejected": -2.9994001388549805, "logps/chosen": -309.4760437011719, "logps/rejected": -280.54541015625, "loss": 0.6832, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.05051179602742195, "rewards/margins": 0.020926663652062416, "rewards/rejected": -0.07143845409154892, "step": 550 }, { "epoch": 0.07, "eval_logits/chosen": -2.737447738647461, "eval_logits/rejected": -2.7334887981414795, "eval_logps/chosen": -338.6698913574219, "eval_logps/rejected": -308.8882751464844, "eval_loss": 0.6843726634979248, "eval_rewards/accuracies": 0.6115000247955322, "eval_rewards/chosen": -0.05920499563217163, "eval_rewards/margins": 0.018577815964818, "eval_rewards/rejected": -0.07778280973434448, "eval_runtime": 197.1006, "eval_samples_per_second": 10.147, "eval_steps_per_second": 5.074, "step": 550 }, { "epoch": 0.07, "learning_rate": 3.6601307189542484e-06, "logits/chosen": -3.0518956184387207, "logits/rejected": -3.001509666442871, "logps/chosen": -364.0932312011719, "logps/rejected": -352.0817565917969, "loss": 0.6788, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.062454767525196075, "rewards/margins": 0.03078216314315796, "rewards/rejected": -0.09323693811893463, "step": 560 }, { "epoch": 0.07, "eval_logits/chosen": -2.736476182937622, "eval_logits/rejected": -2.7330868244171143, "eval_logps/chosen": -339.8880615234375, "eval_logps/rejected": -310.3548889160156, "eval_loss": 0.6832955479621887, "eval_rewards/accuracies": 0.6150000095367432, "eval_rewards/chosen": -0.0713866651058197, "eval_rewards/margins": 0.021062159910798073, "eval_rewards/rejected": -0.09244882315397263, "eval_runtime": 196.7818, "eval_samples_per_second": 10.164, "eval_steps_per_second": 5.082, "step": 560 }, { "epoch": 0.07, "learning_rate": 3.7254901960784316e-06, "logits/chosen": -3.0795345306396484, "logits/rejected": -3.0690414905548096, "logps/chosen": -329.28558349609375, "logps/rejected": -307.31707763671875, "loss": 0.6815, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.0798201709985733, "rewards/margins": 0.024440856650471687, "rewards/rejected": -0.10426102578639984, "step": 570 }, { "epoch": 0.07, "eval_logits/chosen": -2.7351739406585693, "eval_logits/rejected": -2.732409954071045, "eval_logps/chosen": -341.1984558105469, "eval_logps/rejected": -311.93963623046875, "eval_loss": 0.6821067929267883, "eval_rewards/accuracies": 0.6075000166893005, "eval_rewards/chosen": -0.08449088037014008, "eval_rewards/margins": 0.02380536124110222, "eval_rewards/rejected": -0.10829625278711319, "eval_runtime": 196.9602, "eval_samples_per_second": 10.154, "eval_steps_per_second": 5.077, "step": 570 }, { "epoch": 0.08, "learning_rate": 3.7908496732026144e-06, "logits/chosen": -3.0217106342315674, "logits/rejected": -2.97151255607605, "logps/chosen": -386.537109375, "logps/rejected": -340.64154052734375, "loss": 0.678, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.08288892358541489, "rewards/margins": 0.032978884875774384, "rewards/rejected": -0.11586780846118927, "step": 580 }, { "epoch": 0.08, "eval_logits/chosen": -2.7337937355041504, "eval_logits/rejected": -2.7314746379852295, "eval_logps/chosen": -342.7324523925781, "eval_logps/rejected": -313.7508850097656, "eval_loss": 0.6809699535369873, "eval_rewards/accuracies": 0.6069999933242798, "eval_rewards/chosen": -0.0998305007815361, "eval_rewards/margins": 0.026577942073345184, "eval_rewards/rejected": -0.1264084428548813, "eval_runtime": 196.9667, "eval_samples_per_second": 10.154, "eval_steps_per_second": 5.077, "step": 580 }, { "epoch": 0.08, "learning_rate": 3.856209150326798e-06, "logits/chosen": -3.018777847290039, "logits/rejected": -2.9784789085388184, "logps/chosen": -346.2818298339844, "logps/rejected": -310.3592529296875, "loss": 0.6724, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.09781802445650101, "rewards/margins": 0.0433654822409153, "rewards/rejected": -0.1411834955215454, "step": 590 }, { "epoch": 0.08, "eval_logits/chosen": -2.7325494289398193, "eval_logits/rejected": -2.7310123443603516, "eval_logps/chosen": -344.8931579589844, "eval_logps/rejected": -316.2652587890625, "eval_loss": 0.6795624494552612, "eval_rewards/accuracies": 0.5960000157356262, "eval_rewards/chosen": -0.12143778055906296, "eval_rewards/margins": 0.030114755034446716, "eval_rewards/rejected": -0.15155255794525146, "eval_runtime": 196.8786, "eval_samples_per_second": 10.159, "eval_steps_per_second": 5.079, "step": 590 }, { "epoch": 0.08, "learning_rate": 3.92156862745098e-06, "logits/chosen": -2.968508720397949, "logits/rejected": -2.948955535888672, "logps/chosen": -309.74383544921875, "logps/rejected": -293.50933837890625, "loss": 0.686, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -0.1398414522409439, "rewards/margins": 0.01775265857577324, "rewards/rejected": -0.15759411454200745, "step": 600 }, { "epoch": 0.08, "eval_logits/chosen": -2.730149030685425, "eval_logits/rejected": -2.7294833660125732, "eval_logps/chosen": -347.0211181640625, "eval_logps/rejected": -318.7592468261719, "eval_loss": 0.6780930757522583, "eval_rewards/accuracies": 0.6000000238418579, "eval_rewards/chosen": -0.1427169293165207, "eval_rewards/margins": 0.033775582909584045, "eval_rewards/rejected": -0.17649252712726593, "eval_runtime": 197.0711, "eval_samples_per_second": 10.149, "eval_steps_per_second": 5.074, "step": 600 }, { "epoch": 0.08, "learning_rate": 3.986928104575164e-06, "logits/chosen": -3.0006091594696045, "logits/rejected": -2.957949161529541, "logps/chosen": -296.22698974609375, "logps/rejected": -255.9561767578125, "loss": 0.6694, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.12925231456756592, "rewards/margins": 0.05154203251004219, "rewards/rejected": -0.18079432845115662, "step": 610 }, { "epoch": 0.08, "eval_logits/chosen": -2.7311341762542725, "eval_logits/rejected": -2.731069564819336, "eval_logps/chosen": -348.4164733886719, "eval_logps/rejected": -320.3974304199219, "eval_loss": 0.677168607711792, "eval_rewards/accuracies": 0.6025000214576721, "eval_rewards/chosen": -0.15667042136192322, "eval_rewards/margins": 0.03620406240224838, "eval_rewards/rejected": -0.192874476313591, "eval_runtime": 197.0649, "eval_samples_per_second": 10.149, "eval_steps_per_second": 5.074, "step": 610 }, { "epoch": 0.08, "learning_rate": 4.052287581699347e-06, "logits/chosen": -2.987593173980713, "logits/rejected": -2.9816346168518066, "logps/chosen": -366.7933044433594, "logps/rejected": -338.0000915527344, "loss": 0.6606, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.16044440865516663, "rewards/margins": 0.07138291746377945, "rewards/rejected": -0.23182733356952667, "step": 620 }, { "epoch": 0.08, "eval_logits/chosen": -2.731865406036377, "eval_logits/rejected": -2.7336010932922363, "eval_logps/chosen": -353.2502746582031, "eval_logps/rejected": -325.7889404296875, "eval_loss": 0.6752864718437195, "eval_rewards/accuracies": 0.6079999804496765, "eval_rewards/chosen": -0.20500893890857697, "eval_rewards/margins": 0.04177996888756752, "eval_rewards/rejected": -0.2467889040708542, "eval_runtime": 197.1487, "eval_samples_per_second": 10.145, "eval_steps_per_second": 5.072, "step": 620 }, { "epoch": 0.08, "learning_rate": 4.11764705882353e-06, "logits/chosen": -2.906384229660034, "logits/rejected": -2.9101319313049316, "logps/chosen": -342.6524353027344, "logps/rejected": -319.92333984375, "loss": 0.668, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.22237953543663025, "rewards/margins": 0.05616650730371475, "rewards/rejected": -0.2785460650920868, "step": 630 }, { "epoch": 0.08, "eval_logits/chosen": -2.726879358291626, "eval_logits/rejected": -2.730696678161621, "eval_logps/chosen": -358.658935546875, "eval_logps/rejected": -331.7452087402344, "eval_loss": 0.6736900210380554, "eval_rewards/accuracies": 0.6019999980926514, "eval_rewards/chosen": -0.25909480452537537, "eval_rewards/margins": 0.0472571887075901, "eval_rewards/rejected": -0.30635198950767517, "eval_runtime": 196.8854, "eval_samples_per_second": 10.158, "eval_steps_per_second": 5.079, "step": 630 }, { "epoch": 0.08, "learning_rate": 4.183006535947713e-06, "logits/chosen": -3.0391154289245605, "logits/rejected": -3.0323967933654785, "logps/chosen": -351.0347595214844, "logps/rejected": -331.3139343261719, "loss": 0.6781, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.28845497965812683, "rewards/margins": 0.03852443769574165, "rewards/rejected": -0.3269794285297394, "step": 640 }, { "epoch": 0.08, "eval_logits/chosen": -2.716712236404419, "eval_logits/rejected": -2.722219228744507, "eval_logps/chosen": -363.30462646484375, "eval_logps/rejected": -336.9739074707031, "eval_loss": 0.6720592975616455, "eval_rewards/accuracies": 0.6075000166893005, "eval_rewards/chosen": -0.3055519461631775, "eval_rewards/margins": 0.05308679863810539, "eval_rewards/rejected": -0.358638733625412, "eval_runtime": 196.902, "eval_samples_per_second": 10.157, "eval_steps_per_second": 5.079, "step": 640 }, { "epoch": 0.09, "learning_rate": 4.2483660130718954e-06, "logits/chosen": -2.966703414916992, "logits/rejected": -2.988459587097168, "logps/chosen": -348.847900390625, "logps/rejected": -336.23443603515625, "loss": 0.6732, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.29965394735336304, "rewards/margins": 0.05004773288965225, "rewards/rejected": -0.3497017025947571, "step": 650 }, { "epoch": 0.09, "eval_logits/chosen": -2.7116191387176514, "eval_logits/rejected": -2.7170767784118652, "eval_logps/chosen": -361.92413330078125, "eval_logps/rejected": -335.84771728515625, "eval_loss": 0.6707616448402405, "eval_rewards/accuracies": 0.609499990940094, "eval_rewards/chosen": -0.2917468845844269, "eval_rewards/margins": 0.05562999099493027, "eval_rewards/rejected": -0.34737691283226013, "eval_runtime": 196.9115, "eval_samples_per_second": 10.157, "eval_steps_per_second": 5.078, "step": 650 }, { "epoch": 0.09, "learning_rate": 4.313725490196079e-06, "logits/chosen": -2.990095615386963, "logits/rejected": -2.948988914489746, "logps/chosen": -390.239990234375, "logps/rejected": -324.51409912109375, "loss": 0.6768, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.28673607110977173, "rewards/margins": 0.044372208416461945, "rewards/rejected": -0.3311082720756531, "step": 660 }, { "epoch": 0.09, "eval_logits/chosen": -2.7104814052581787, "eval_logits/rejected": -2.7157156467437744, "eval_logps/chosen": -361.3223876953125, "eval_logps/rejected": -335.4339904785156, "eval_loss": 0.6699734330177307, "eval_rewards/accuracies": 0.6129999756813049, "eval_rewards/chosen": -0.2857293486595154, "eval_rewards/margins": 0.057510748505592346, "eval_rewards/rejected": -0.3432401120662689, "eval_runtime": 196.9573, "eval_samples_per_second": 10.154, "eval_steps_per_second": 5.077, "step": 660 }, { "epoch": 0.09, "learning_rate": 4.379084967320262e-06, "logits/chosen": -3.0028021335601807, "logits/rejected": -2.9637579917907715, "logps/chosen": -373.4584655761719, "logps/rejected": -390.0169677734375, "loss": 0.667, "rewards/accuracies": 0.5625, "rewards/chosen": -0.27895650267601013, "rewards/margins": 0.06341058760881424, "rewards/rejected": -0.34236711263656616, "step": 670 }, { "epoch": 0.09, "eval_logits/chosen": -2.7081527709960938, "eval_logits/rejected": -2.7128231525421143, "eval_logps/chosen": -361.0499572753906, "eval_logps/rejected": -335.34429931640625, "eval_loss": 0.6692450642585754, "eval_rewards/accuracies": 0.6184999942779541, "eval_rewards/chosen": -0.2830057144165039, "eval_rewards/margins": 0.05933738872408867, "eval_rewards/rejected": -0.3423430919647217, "eval_runtime": 196.8698, "eval_samples_per_second": 10.159, "eval_steps_per_second": 5.079, "step": 670 }, { "epoch": 0.09, "learning_rate": 4.444444444444444e-06, "logits/chosen": -3.0348241329193115, "logits/rejected": -3.0564351081848145, "logps/chosen": -363.07989501953125, "logps/rejected": -341.8347473144531, "loss": 0.6693, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.26485368609428406, "rewards/margins": 0.05861664563417435, "rewards/rejected": -0.3234703540802002, "step": 680 }, { "epoch": 0.09, "eval_logits/chosen": -2.7053518295288086, "eval_logits/rejected": -2.7100462913513184, "eval_logps/chosen": -362.4294128417969, "eval_logps/rejected": -336.986083984375, "eval_loss": 0.6684760451316833, "eval_rewards/accuracies": 0.6144999861717224, "eval_rewards/chosen": -0.2967996597290039, "eval_rewards/margins": 0.06196107342839241, "eval_rewards/rejected": -0.3587607443332672, "eval_runtime": 196.8194, "eval_samples_per_second": 10.162, "eval_steps_per_second": 5.081, "step": 680 }, { "epoch": 0.09, "learning_rate": 4.509803921568628e-06, "logits/chosen": -3.026939630508423, "logits/rejected": -2.987724781036377, "logps/chosen": -395.9893493652344, "logps/rejected": -351.2083740234375, "loss": 0.6707, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2795209288597107, "rewards/margins": 0.05711476877331734, "rewards/rejected": -0.3366357386112213, "step": 690 }, { "epoch": 0.09, "eval_logits/chosen": -2.7039105892181396, "eval_logits/rejected": -2.708899974822998, "eval_logps/chosen": -363.3128967285156, "eval_logps/rejected": -338.14306640625, "eval_loss": 0.6675823926925659, "eval_rewards/accuracies": 0.6150000095367432, "eval_rewards/chosen": -0.3056354224681854, "eval_rewards/margins": 0.06469501554965973, "eval_rewards/rejected": -0.37033045291900635, "eval_runtime": 197.1839, "eval_samples_per_second": 10.143, "eval_steps_per_second": 5.071, "step": 690 }, { "epoch": 0.09, "learning_rate": 4.5751633986928105e-06, "logits/chosen": -3.018585681915283, "logits/rejected": -3.023458480834961, "logps/chosen": -367.96673583984375, "logps/rejected": -350.847412109375, "loss": 0.6718, "rewards/accuracies": 0.5625, "rewards/chosen": -0.29729920625686646, "rewards/margins": 0.05686334893107414, "rewards/rejected": -0.3541625738143921, "step": 700 }, { "epoch": 0.09, "eval_logits/chosen": -2.7001843452453613, "eval_logits/rejected": -2.7050814628601074, "eval_logps/chosen": -362.6632080078125, "eval_logps/rejected": -337.5663146972656, "eval_loss": 0.6671297550201416, "eval_rewards/accuracies": 0.6144999861717224, "eval_rewards/chosen": -0.29913830757141113, "eval_rewards/margins": 0.06542481482028961, "eval_rewards/rejected": -0.36456310749053955, "eval_runtime": 197.0044, "eval_samples_per_second": 10.152, "eval_steps_per_second": 5.076, "step": 700 }, { "epoch": 0.09, "learning_rate": 4.640522875816994e-06, "logits/chosen": -3.025627851486206, "logits/rejected": -3.008409261703491, "logps/chosen": -374.4230041503906, "logps/rejected": -355.96136474609375, "loss": 0.6511, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.29435834288597107, "rewards/margins": 0.09984922409057617, "rewards/rejected": -0.39420756697654724, "step": 710 }, { "epoch": 0.09, "eval_logits/chosen": -2.6939971446990967, "eval_logits/rejected": -2.699645519256592, "eval_logps/chosen": -366.7897644042969, "eval_logps/rejected": -342.2338562011719, "eval_loss": 0.6659175157546997, "eval_rewards/accuracies": 0.6134999990463257, "eval_rewards/chosen": -0.3404030501842499, "eval_rewards/margins": 0.07083506137132645, "eval_rewards/rejected": -0.4112381339073181, "eval_runtime": 196.9928, "eval_samples_per_second": 10.153, "eval_steps_per_second": 5.076, "step": 710 }, { "epoch": 0.09, "learning_rate": 4.705882352941177e-06, "logits/chosen": -2.976383686065674, "logits/rejected": -2.9598684310913086, "logps/chosen": -433.13421630859375, "logps/rejected": -401.3202209472656, "loss": 0.6685, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.37345483899116516, "rewards/margins": 0.07345791161060333, "rewards/rejected": -0.4469127655029297, "step": 720 }, { "epoch": 0.09, "eval_logits/chosen": -2.690136194229126, "eval_logits/rejected": -2.6963469982147217, "eval_logps/chosen": -369.37225341796875, "eval_logps/rejected": -345.1917419433594, "eval_loss": 0.6651197671890259, "eval_rewards/accuracies": 0.6110000014305115, "eval_rewards/chosen": -0.36622846126556396, "eval_rewards/margins": 0.07458891719579697, "eval_rewards/rejected": -0.44081738591194153, "eval_runtime": 196.8075, "eval_samples_per_second": 10.162, "eval_steps_per_second": 5.081, "step": 720 }, { "epoch": 0.1, "learning_rate": 4.77124183006536e-06, "logits/chosen": -2.9549527168273926, "logits/rejected": -2.9584693908691406, "logps/chosen": -390.4580383300781, "logps/rejected": -374.32147216796875, "loss": 0.6702, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.3789103031158447, "rewards/margins": 0.0608968660235405, "rewards/rejected": -0.43980711698532104, "step": 730 }, { "epoch": 0.1, "eval_logits/chosen": -2.6869893074035645, "eval_logits/rejected": -2.6939523220062256, "eval_logps/chosen": -371.3726501464844, "eval_logps/rejected": -347.4822082519531, "eval_loss": 0.6645473837852478, "eval_rewards/accuracies": 0.6134999990463257, "eval_rewards/chosen": -0.38623228669166565, "eval_rewards/margins": 0.07748986035585403, "eval_rewards/rejected": -0.4637221693992615, "eval_runtime": 197.0176, "eval_samples_per_second": 10.151, "eval_steps_per_second": 5.076, "step": 730 }, { "epoch": 0.1, "learning_rate": 4.836601307189543e-06, "logits/chosen": -2.9691169261932373, "logits/rejected": -2.9327917098999023, "logps/chosen": -390.6695556640625, "logps/rejected": -331.70257568359375, "loss": 0.6723, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.34061798453330994, "rewards/margins": 0.05245697498321533, "rewards/rejected": -0.3930749297142029, "step": 740 }, { "epoch": 0.1, "eval_logits/chosen": -2.688300371170044, "eval_logits/rejected": -2.695115327835083, "eval_logps/chosen": -369.962646484375, "eval_logps/rejected": -346.0401916503906, "eval_loss": 0.6643568277359009, "eval_rewards/accuracies": 0.6144999861717224, "eval_rewards/chosen": -0.3721325099468231, "eval_rewards/margins": 0.07716938108205795, "eval_rewards/rejected": -0.44930192828178406, "eval_runtime": 196.9446, "eval_samples_per_second": 10.155, "eval_steps_per_second": 5.078, "step": 740 }, { "epoch": 0.1, "learning_rate": 4.901960784313726e-06, "logits/chosen": -2.872075319290161, "logits/rejected": -2.8731420040130615, "logps/chosen": -331.8930969238281, "logps/rejected": -321.7856140136719, "loss": 0.6484, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.34029215574264526, "rewards/margins": 0.10654574632644653, "rewards/rejected": -0.4468379020690918, "step": 750 }, { "epoch": 0.1, "eval_logits/chosen": -2.6878302097320557, "eval_logits/rejected": -2.695065975189209, "eval_logps/chosen": -372.1144104003906, "eval_logps/rejected": -348.4941711425781, "eval_loss": 0.6637778878211975, "eval_rewards/accuracies": 0.6110000014305115, "eval_rewards/chosen": -0.3936500549316406, "eval_rewards/margins": 0.0801912397146225, "eval_rewards/rejected": -0.47384127974510193, "eval_runtime": 197.1868, "eval_samples_per_second": 10.143, "eval_steps_per_second": 5.071, "step": 750 }, { "epoch": 0.1, "learning_rate": 4.967320261437909e-06, "logits/chosen": -2.959843158721924, "logits/rejected": -2.97227144241333, "logps/chosen": -369.81719970703125, "logps/rejected": -319.16412353515625, "loss": 0.6701, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.360421359539032, "rewards/margins": 0.057536423206329346, "rewards/rejected": -0.41795778274536133, "step": 760 }, { "epoch": 0.1, "eval_logits/chosen": -2.688547134399414, "eval_logits/rejected": -2.695556879043579, "eval_logps/chosen": -370.23651123046875, "eval_logps/rejected": -346.562744140625, "eval_loss": 0.6633652448654175, "eval_rewards/accuracies": 0.6159999966621399, "eval_rewards/chosen": -0.37487098574638367, "eval_rewards/margins": 0.079656220972538, "eval_rewards/rejected": -0.45452719926834106, "eval_runtime": 197.0589, "eval_samples_per_second": 10.149, "eval_steps_per_second": 5.075, "step": 760 }, { "epoch": 0.1, "learning_rate": 4.999993476542427e-06, "logits/chosen": -2.996096134185791, "logits/rejected": -2.9806487560272217, "logps/chosen": -382.1415100097656, "logps/rejected": -356.4634704589844, "loss": 0.6578, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.33577603101730347, "rewards/margins": 0.09010833501815796, "rewards/rejected": -0.4258843958377838, "step": 770 }, { "epoch": 0.1, "eval_logits/chosen": -2.6854639053344727, "eval_logits/rejected": -2.6923415660858154, "eval_logps/chosen": -372.1638488769531, "eval_logps/rejected": -348.6732482910156, "eval_loss": 0.663102924823761, "eval_rewards/accuracies": 0.6140000224113464, "eval_rewards/chosen": -0.3941444158554077, "eval_rewards/margins": 0.08148758113384247, "eval_rewards/rejected": -0.4756320118904114, "eval_runtime": 197.3159, "eval_samples_per_second": 10.136, "eval_steps_per_second": 5.068, "step": 770 }, { "epoch": 0.1, "learning_rate": 4.999941289086112e-06, "logits/chosen": -2.9802470207214355, "logits/rejected": -2.9475085735321045, "logps/chosen": -388.72674560546875, "logps/rejected": -349.171875, "loss": 0.6555, "rewards/accuracies": 0.625, "rewards/chosen": -0.3842027187347412, "rewards/margins": 0.10800528526306152, "rewards/rejected": -0.49220794439315796, "step": 780 }, { "epoch": 0.1, "eval_logits/chosen": -2.6853272914886475, "eval_logits/rejected": -2.6920483112335205, "eval_logps/chosen": -372.9538269042969, "eval_logps/rejected": -349.61456298828125, "eval_loss": 0.6627827882766724, "eval_rewards/accuracies": 0.6169999837875366, "eval_rewards/chosen": -0.40204355120658875, "eval_rewards/margins": 0.08300190418958664, "eval_rewards/rejected": -0.48504549264907837, "eval_runtime": 197.0362, "eval_samples_per_second": 10.15, "eval_steps_per_second": 5.075, "step": 780 }, { "epoch": 0.1, "learning_rate": 4.999836915262896e-06, "logits/chosen": -2.9006078243255615, "logits/rejected": -2.9233975410461426, "logps/chosen": -375.62646484375, "logps/rejected": -387.4599609375, "loss": 0.6354, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3945903480052948, "rewards/margins": 0.14294961094856262, "rewards/rejected": -0.5375399589538574, "step": 790 }, { "epoch": 0.1, "eval_logits/chosen": -2.6717944145202637, "eval_logits/rejected": -2.679598331451416, "eval_logps/chosen": -378.26531982421875, "eval_logps/rejected": -355.6182556152344, "eval_loss": 0.6618691682815552, "eval_rewards/accuracies": 0.609499990940094, "eval_rewards/chosen": -0.45515894889831543, "eval_rewards/margins": 0.08992352336645126, "eval_rewards/rejected": -0.5450823903083801, "eval_runtime": 197.0915, "eval_samples_per_second": 10.148, "eval_steps_per_second": 5.074, "step": 790 }, { "epoch": 0.1, "learning_rate": 4.999680357251587e-06, "logits/chosen": -2.7958996295928955, "logits/rejected": -2.850475788116455, "logps/chosen": -355.81787109375, "logps/rejected": -378.65435791015625, "loss": 0.643, "rewards/accuracies": 0.6875, "rewards/chosen": -0.45146241784095764, "rewards/margins": 0.12599804997444153, "rewards/rejected": -0.577460527420044, "step": 800 }, { "epoch": 0.1, "eval_logits/chosen": -2.656829595565796, "eval_logits/rejected": -2.6665048599243164, "eval_logps/chosen": -383.33489990234375, "eval_logps/rejected": -361.54486083984375, "eval_loss": 0.6610292196273804, "eval_rewards/accuracies": 0.609499990940094, "eval_rewards/chosen": -0.5058547854423523, "eval_rewards/margins": 0.09849373996257782, "eval_rewards/rejected": -0.6043485999107361, "eval_runtime": 197.0677, "eval_samples_per_second": 10.149, "eval_steps_per_second": 5.074, "step": 800 }, { "epoch": 0.11, "learning_rate": 4.999471618320339e-06, "logits/chosen": -2.8944575786590576, "logits/rejected": -2.9122395515441895, "logps/chosen": -402.0007019042969, "logps/rejected": -359.7335510253906, "loss": 0.6762, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.4933040142059326, "rewards/margins": 0.05890519544482231, "rewards/rejected": -0.5522092580795288, "step": 810 }, { "epoch": 0.11, "eval_logits/chosen": -2.650148868560791, "eval_logits/rejected": -2.6608121395111084, "eval_logps/chosen": -381.2932434082031, "eval_logps/rejected": -359.6947937011719, "eval_loss": 0.660219669342041, "eval_rewards/accuracies": 0.6110000014305115, "eval_rewards/chosen": -0.48543816804885864, "eval_rewards/margins": 0.10040930658578873, "eval_rewards/rejected": -0.5858475565910339, "eval_runtime": 197.1031, "eval_samples_per_second": 10.147, "eval_steps_per_second": 5.073, "step": 810 }, { "epoch": 0.11, "learning_rate": 4.999210702826586e-06, "logits/chosen": -3.0301737785339355, "logits/rejected": -3.028296709060669, "logps/chosen": -423.05059814453125, "logps/rejected": -380.2186279296875, "loss": 0.6559, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.47225722670555115, "rewards/margins": 0.10403690487146378, "rewards/rejected": -0.5762940645217896, "step": 820 }, { "epoch": 0.11, "eval_logits/chosen": -2.6431291103363037, "eval_logits/rejected": -2.6536731719970703, "eval_logps/chosen": -380.1671447753906, "eval_logps/rejected": -358.74859619140625, "eval_loss": 0.6598737835884094, "eval_rewards/accuracies": 0.6104999780654907, "eval_rewards/chosen": -0.47417721152305603, "eval_rewards/margins": 0.10220862179994583, "eval_rewards/rejected": -0.5763858556747437, "eval_runtime": 197.3715, "eval_samples_per_second": 10.133, "eval_steps_per_second": 5.067, "step": 820 }, { "epoch": 0.11, "learning_rate": 4.998897616216947e-06, "logits/chosen": -2.8779802322387695, "logits/rejected": -2.903449535369873, "logps/chosen": -321.4455261230469, "logps/rejected": -371.50054931640625, "loss": 0.638, "rewards/accuracies": 0.625, "rewards/chosen": -0.4697656035423279, "rewards/margins": 0.14891940355300903, "rewards/rejected": -0.6186850070953369, "step": 830 }, { "epoch": 0.11, "eval_logits/chosen": -2.624596118927002, "eval_logits/rejected": -2.6360020637512207, "eval_logps/chosen": -389.80560302734375, "eval_logps/rejected": -369.4499816894531, "eval_loss": 0.6608967185020447, "eval_rewards/accuracies": 0.6039999723434448, "eval_rewards/chosen": -0.5705617666244507, "eval_rewards/margins": 0.11283760517835617, "eval_rewards/rejected": -0.6833993792533875, "eval_runtime": 197.2478, "eval_samples_per_second": 10.14, "eval_steps_per_second": 5.07, "step": 830 }, { "epoch": 0.11, "learning_rate": 4.998532365027117e-06, "logits/chosen": -2.783334970474243, "logits/rejected": -2.809696674346924, "logps/chosen": -391.5068054199219, "logps/rejected": -329.7892761230469, "loss": 0.6485, "rewards/accuracies": 0.6875, "rewards/chosen": -0.5235930681228638, "rewards/margins": 0.12364151328802109, "rewards/rejected": -0.6472345590591431, "step": 840 }, { "epoch": 0.11, "eval_logits/chosen": -2.616718292236328, "eval_logits/rejected": -2.6274757385253906, "eval_logps/chosen": -393.76824951171875, "eval_logps/rejected": -373.88800048828125, "eval_loss": 0.6622524261474609, "eval_rewards/accuracies": 0.6044999957084656, "eval_rewards/chosen": -0.6101884841918945, "eval_rewards/margins": 0.11759106814861298, "eval_rewards/rejected": -0.7277796268463135, "eval_runtime": 197.2914, "eval_samples_per_second": 10.137, "eval_steps_per_second": 5.069, "step": 840 }, { "epoch": 0.11, "learning_rate": 4.9981149568817275e-06, "logits/chosen": -2.8706066608428955, "logits/rejected": -2.874828577041626, "logps/chosen": -396.6725158691406, "logps/rejected": -420.00732421875, "loss": 0.6393, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.529072105884552, "rewards/margins": 0.15510477125644684, "rewards/rejected": -0.6841768026351929, "step": 850 }, { "epoch": 0.11, "eval_logits/chosen": -2.6060431003570557, "eval_logits/rejected": -2.616872549057007, "eval_logps/chosen": -398.9680480957031, "eval_logps/rejected": -379.9243469238281, "eval_loss": 0.664020299911499, "eval_rewards/accuracies": 0.6054999828338623, "eval_rewards/chosen": -0.6621867418289185, "eval_rewards/margins": 0.1259564757347107, "eval_rewards/rejected": -0.7881432771682739, "eval_runtime": 196.8874, "eval_samples_per_second": 10.158, "eval_steps_per_second": 5.079, "step": 850 }, { "epoch": 0.11, "learning_rate": 4.997645400494192e-06, "logits/chosen": -2.8616645336151123, "logits/rejected": -2.839806079864502, "logps/chosen": -367.6358947753906, "logps/rejected": -367.6234436035156, "loss": 0.6465, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.676906943321228, "rewards/margins": 0.1602444052696228, "rewards/rejected": -0.8371513485908508, "step": 860 }, { "epoch": 0.11, "eval_logits/chosen": -2.5956826210021973, "eval_logits/rejected": -2.6065311431884766, "eval_logps/chosen": -405.5986633300781, "eval_logps/rejected": -387.4784240722656, "eval_loss": 0.6669895052909851, "eval_rewards/accuracies": 0.6069999933242798, "eval_rewards/chosen": -0.7284926772117615, "eval_rewards/margins": 0.13519158959388733, "eval_rewards/rejected": -0.8636841773986816, "eval_runtime": 196.8502, "eval_samples_per_second": 10.16, "eval_steps_per_second": 5.08, "step": 860 }, { "epoch": 0.11, "learning_rate": 4.997123705666514e-06, "logits/chosen": -2.844677448272705, "logits/rejected": -2.8251328468322754, "logps/chosen": -411.3539123535156, "logps/rejected": -404.601806640625, "loss": 0.6604, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.6887822151184082, "rewards/margins": 0.11688725650310516, "rewards/rejected": -0.8056694865226746, "step": 870 }, { "epoch": 0.11, "eval_logits/chosen": -2.6024270057678223, "eval_logits/rejected": -2.6138432025909424, "eval_logps/chosen": -401.89031982421875, "eval_logps/rejected": -383.3900146484375, "eval_loss": 0.664444088935852, "eval_rewards/accuracies": 0.6060000061988831, "eval_rewards/chosen": -0.6914088129997253, "eval_rewards/margins": 0.13139095902442932, "eval_rewards/rejected": -0.8227998614311218, "eval_runtime": 196.9191, "eval_samples_per_second": 10.156, "eval_steps_per_second": 5.078, "step": 870 }, { "epoch": 0.12, "learning_rate": 4.996549883289093e-06, "logits/chosen": -2.82551646232605, "logits/rejected": -2.7892441749572754, "logps/chosen": -384.5074157714844, "logps/rejected": -408.62579345703125, "loss": 0.6875, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.8276923894882202, "rewards/margins": 0.09274474531412125, "rewards/rejected": -0.9204371571540833, "step": 880 }, { "epoch": 0.12, "eval_logits/chosen": -2.5992438793182373, "eval_logits/rejected": -2.6100361347198486, "eval_logps/chosen": -407.5960998535156, "eval_logps/rejected": -389.8180847167969, "eval_loss": 0.666872501373291, "eval_rewards/accuracies": 0.6014999747276306, "eval_rewards/chosen": -0.7484666705131531, "eval_rewards/margins": 0.13861419260501862, "eval_rewards/rejected": -0.8870808482170105, "eval_runtime": 197.1631, "eval_samples_per_second": 10.144, "eval_steps_per_second": 5.072, "step": 880 }, { "epoch": 0.12, "learning_rate": 4.995923945340495e-06, "logits/chosen": -2.87914776802063, "logits/rejected": -2.8681235313415527, "logps/chosen": -388.1961364746094, "logps/rejected": -399.79119873046875, "loss": 0.6721, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.7051995992660522, "rewards/margins": 0.10840250551700592, "rewards/rejected": -0.8136021494865417, "step": 890 }, { "epoch": 0.12, "eval_logits/chosen": -2.610374927520752, "eval_logits/rejected": -2.620400905609131, "eval_logps/chosen": -401.38885498046875, "eval_logps/rejected": -383.13677978515625, "eval_loss": 0.6637265682220459, "eval_rewards/accuracies": 0.6060000061988831, "eval_rewards/chosen": -0.6863947510719299, "eval_rewards/margins": 0.13387317955493927, "eval_rewards/rejected": -0.8202678561210632, "eval_runtime": 196.9446, "eval_samples_per_second": 10.155, "eval_steps_per_second": 5.078, "step": 890 }, { "epoch": 0.12, "learning_rate": 4.995245904887195e-06, "logits/chosen": -2.8773951530456543, "logits/rejected": -2.871093273162842, "logps/chosen": -376.4679870605469, "logps/rejected": -338.0958251953125, "loss": 0.7118, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.7589614391326904, "rewards/margins": 0.04700089246034622, "rewards/rejected": -0.8059623837471008, "step": 900 }, { "epoch": 0.12, "eval_logits/chosen": -2.6222400665283203, "eval_logits/rejected": -2.6316025257110596, "eval_logps/chosen": -391.1489562988281, "eval_logps/rejected": -371.9417724609375, "eval_loss": 0.6598663330078125, "eval_rewards/accuracies": 0.6019999980926514, "eval_rewards/chosen": -0.5839956998825073, "eval_rewards/margins": 0.12432169914245605, "eval_rewards/rejected": -0.7083174586296082, "eval_runtime": 197.1091, "eval_samples_per_second": 10.147, "eval_steps_per_second": 5.073, "step": 900 }, { "epoch": 0.12, "learning_rate": 4.994515776083313e-06, "logits/chosen": -2.8134074211120605, "logits/rejected": -2.856207847595215, "logps/chosen": -391.5662536621094, "logps/rejected": -442.9309997558594, "loss": 0.612, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.5585755109786987, "rewards/margins": 0.2331864833831787, "rewards/rejected": -0.7917619347572327, "step": 910 }, { "epoch": 0.12, "eval_logits/chosen": -2.6121630668640137, "eval_logits/rejected": -2.621626853942871, "eval_logps/chosen": -394.4494323730469, "eval_logps/rejected": -375.6937255859375, "eval_loss": 0.6611830592155457, "eval_rewards/accuracies": 0.6100000143051147, "eval_rewards/chosen": -0.6170003414154053, "eval_rewards/margins": 0.12883655726909637, "eval_rewards/rejected": -0.7458369731903076, "eval_runtime": 197.2098, "eval_samples_per_second": 10.141, "eval_steps_per_second": 5.071, "step": 910 }, { "epoch": 0.12, "learning_rate": 4.993733574170316e-06, "logits/chosen": -2.858757972717285, "logits/rejected": -2.8651883602142334, "logps/chosen": -346.06536865234375, "logps/rejected": -341.8927917480469, "loss": 0.6676, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.5705429911613464, "rewards/margins": 0.13456781208515167, "rewards/rejected": -0.7051107883453369, "step": 920 }, { "epoch": 0.12, "eval_logits/chosen": -2.609501361846924, "eval_logits/rejected": -2.619729995727539, "eval_logps/chosen": -394.84228515625, "eval_logps/rejected": -376.29443359375, "eval_loss": 0.6611314415931702, "eval_rewards/accuracies": 0.609000027179718, "eval_rewards/chosen": -0.6209287643432617, "eval_rewards/margins": 0.1309155523777008, "eval_rewards/rejected": -0.7518444061279297, "eval_runtime": 196.9861, "eval_samples_per_second": 10.153, "eval_steps_per_second": 5.077, "step": 920 }, { "epoch": 0.12, "learning_rate": 4.992899315476696e-06, "logits/chosen": -2.884894371032715, "logits/rejected": -2.8854660987854004, "logps/chosen": -450.76397705078125, "logps/rejected": -413.5362243652344, "loss": 0.6577, "rewards/accuracies": 0.5625, "rewards/chosen": -0.6512743830680847, "rewards/margins": 0.13802878558635712, "rewards/rejected": -0.789303183555603, "step": 930 }, { "epoch": 0.12, "eval_logits/chosen": -2.6049296855926514, "eval_logits/rejected": -2.614792585372925, "eval_logps/chosen": -397.35797119140625, "eval_logps/rejected": -379.2372741699219, "eval_loss": 0.6619851589202881, "eval_rewards/accuracies": 0.609000027179718, "eval_rewards/chosen": -0.6460856199264526, "eval_rewards/margins": 0.135187029838562, "eval_rewards/rejected": -0.7812727093696594, "eval_runtime": 197.0738, "eval_samples_per_second": 10.148, "eval_steps_per_second": 5.074, "step": 930 }, { "epoch": 0.12, "learning_rate": 4.9920130174176354e-06, "logits/chosen": -2.8599836826324463, "logits/rejected": -2.8363242149353027, "logps/chosen": -408.45501708984375, "logps/rejected": -397.5457763671875, "loss": 0.637, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.6751912832260132, "rewards/margins": 0.18209555745124817, "rewards/rejected": -0.8572869300842285, "step": 940 }, { "epoch": 0.12, "eval_logits/chosen": -2.6164982318878174, "eval_logits/rejected": -2.6263084411621094, "eval_logps/chosen": -390.9404602050781, "eval_logps/rejected": -372.2401428222656, "eval_loss": 0.659035325050354, "eval_rewards/accuracies": 0.609499990940094, "eval_rewards/chosen": -0.5819105505943298, "eval_rewards/margins": 0.1293908655643463, "eval_rewards/rejected": -0.7113014459609985, "eval_runtime": 197.105, "eval_samples_per_second": 10.147, "eval_steps_per_second": 5.073, "step": 940 }, { "epoch": 0.12, "learning_rate": 4.991074698494638e-06, "logits/chosen": -2.910370349884033, "logits/rejected": -2.889981746673584, "logps/chosen": -395.23870849609375, "logps/rejected": -352.4375915527344, "loss": 0.6663, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.5276027917861938, "rewards/margins": 0.09363868087530136, "rewards/rejected": -0.621241569519043, "step": 950 }, { "epoch": 0.12, "eval_logits/chosen": -2.623030185699463, "eval_logits/rejected": -2.6329829692840576, "eval_logps/chosen": -385.2651062011719, "eval_logps/rejected": -366.1216125488281, "eval_loss": 0.6571491956710815, "eval_rewards/accuracies": 0.6115000247955322, "eval_rewards/chosen": -0.5251567959785461, "eval_rewards/margins": 0.12495911866426468, "eval_rewards/rejected": -0.6501159071922302, "eval_runtime": 197.0134, "eval_samples_per_second": 10.152, "eval_steps_per_second": 5.076, "step": 950 }, { "epoch": 0.13, "learning_rate": 4.990084378295148e-06, "logits/chosen": -2.9056191444396973, "logits/rejected": -2.914172410964966, "logps/chosen": -356.25457763671875, "logps/rejected": -331.71575927734375, "loss": 0.6437, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.48699110746383667, "rewards/margins": 0.14344017207622528, "rewards/rejected": -0.6304312944412231, "step": 960 }, { "epoch": 0.13, "eval_logits/chosen": -2.6210577487945557, "eval_logits/rejected": -2.631321907043457, "eval_logps/chosen": -384.9283752441406, "eval_logps/rejected": -365.94134521484375, "eval_loss": 0.6564494967460632, "eval_rewards/accuracies": 0.6140000224113464, "eval_rewards/chosen": -0.5217894315719604, "eval_rewards/margins": 0.1265236735343933, "eval_rewards/rejected": -0.6483131051063538, "eval_runtime": 197.16, "eval_samples_per_second": 10.144, "eval_steps_per_second": 5.072, "step": 960 }, { "epoch": 0.13, "learning_rate": 4.989042077492135e-06, "logits/chosen": -2.8806536197662354, "logits/rejected": -2.8581955432891846, "logps/chosen": -384.424560546875, "logps/rejected": -377.8148498535156, "loss": 0.6053, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.45277565717697144, "rewards/margins": 0.22111694514751434, "rewards/rejected": -0.6738926768302917, "step": 970 }, { "epoch": 0.13, "eval_logits/chosen": -2.6111137866973877, "eval_logits/rejected": -2.62229585647583, "eval_logps/chosen": -389.8968811035156, "eval_logps/rejected": -371.7264404296875, "eval_loss": 0.6566739678382874, "eval_rewards/accuracies": 0.6179999709129333, "eval_rewards/chosen": -0.5714748501777649, "eval_rewards/margins": 0.1346893310546875, "eval_rewards/rejected": -0.7061640620231628, "eval_runtime": 197.1264, "eval_samples_per_second": 10.146, "eval_steps_per_second": 5.073, "step": 970 }, { "epoch": 0.13, "learning_rate": 4.987947817843665e-06, "logits/chosen": -2.7882161140441895, "logits/rejected": -2.828187942504883, "logps/chosen": -369.6874084472656, "logps/rejected": -356.955810546875, "loss": 0.63, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.6042593717575073, "rewards/margins": 0.21899476647377014, "rewards/rejected": -0.8232541084289551, "step": 980 }, { "epoch": 0.13, "eval_logits/chosen": -2.5871171951293945, "eval_logits/rejected": -2.59865403175354, "eval_logps/chosen": -404.0250244140625, "eval_logps/rejected": -387.54180908203125, "eval_loss": 0.6614810824394226, "eval_rewards/accuracies": 0.612500011920929, "eval_rewards/chosen": -0.7127563953399658, "eval_rewards/margins": 0.15156131982803345, "eval_rewards/rejected": -0.8643176555633545, "eval_runtime": 197.0053, "eval_samples_per_second": 10.152, "eval_steps_per_second": 5.076, "step": 980 }, { "epoch": 0.13, "learning_rate": 4.986801622192453e-06, "logits/chosen": -2.840859889984131, "logits/rejected": -2.831991672515869, "logps/chosen": -351.64703369140625, "logps/rejected": -344.9053649902344, "loss": 0.6434, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.7253022193908691, "rewards/margins": 0.21113955974578857, "rewards/rejected": -0.9364417195320129, "step": 990 }, { "epoch": 0.13, "eval_logits/chosen": -2.580679416656494, "eval_logits/rejected": -2.5909790992736816, "eval_logps/chosen": -407.9801330566406, "eval_logps/rejected": -392.0733337402344, "eval_loss": 0.6637634634971619, "eval_rewards/accuracies": 0.6075000166893005, "eval_rewards/chosen": -0.7523072361946106, "eval_rewards/margins": 0.15732604265213013, "eval_rewards/rejected": -0.9096333384513855, "eval_runtime": 196.9793, "eval_samples_per_second": 10.153, "eval_steps_per_second": 5.077, "step": 990 }, { "epoch": 0.13, "learning_rate": 4.985603514465372e-06, "logits/chosen": -2.8628830909729004, "logits/rejected": -2.8967411518096924, "logps/chosen": -390.51971435546875, "logps/rejected": -410.86322021484375, "loss": 0.6192, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.6669122576713562, "rewards/margins": 0.27157723903656006, "rewards/rejected": -0.938489556312561, "step": 1000 }, { "epoch": 0.13, "eval_logits/chosen": -2.580045700073242, "eval_logits/rejected": -2.589877128601074, "eval_logps/chosen": -409.75250244140625, "eval_logps/rejected": -394.31695556640625, "eval_loss": 0.6645926237106323, "eval_rewards/accuracies": 0.6079999804496765, "eval_rewards/chosen": -0.7700310945510864, "eval_rewards/margins": 0.1620384156703949, "eval_rewards/rejected": -0.9320694208145142, "eval_runtime": 196.9612, "eval_samples_per_second": 10.154, "eval_steps_per_second": 5.077, "step": 1000 }, { "epoch": 0.13, "learning_rate": 4.984353519672966e-06, "logits/chosen": -2.780689001083374, "logits/rejected": -2.805438280105591, "logps/chosen": -399.078857421875, "logps/rejected": -371.93798828125, "loss": 0.6942, "rewards/accuracies": 0.5625, "rewards/chosen": -0.7688107490539551, "rewards/margins": 0.07026199996471405, "rewards/rejected": -0.8390728235244751, "step": 1010 }, { "epoch": 0.13, "eval_logits/chosen": -2.5973100662231445, "eval_logits/rejected": -2.6079728603363037, "eval_logps/chosen": -397.49072265625, "eval_logps/rejected": -380.68865966796875, "eval_loss": 0.6575655341148376, "eval_rewards/accuracies": 0.6144999861717224, "eval_rewards/chosen": -0.6474130153656006, "eval_rewards/margins": 0.14837341010570526, "eval_rewards/rejected": -0.7957863807678223, "eval_runtime": 197.0561, "eval_samples_per_second": 10.149, "eval_steps_per_second": 5.075, "step": 1010 }, { "epoch": 0.13, "learning_rate": 4.9830516639089226e-06, "logits/chosen": -2.8402628898620605, "logits/rejected": -2.847748279571533, "logps/chosen": -434.74786376953125, "logps/rejected": -364.552978515625, "loss": 0.6408, "rewards/accuracies": 0.625, "rewards/chosen": -0.6251915097236633, "rewards/margins": 0.16107437014579773, "rewards/rejected": -0.7862659692764282, "step": 1020 }, { "epoch": 0.13, "eval_logits/chosen": -2.5868897438049316, "eval_logits/rejected": -2.597947359085083, "eval_logps/chosen": -401.7997741699219, "eval_logps/rejected": -385.65380859375, "eval_loss": 0.6590712666511536, "eval_rewards/accuracies": 0.6150000095367432, "eval_rewards/chosen": -0.6905036568641663, "eval_rewards/margins": 0.15493395924568176, "eval_rewards/rejected": -0.8454375863075256, "eval_runtime": 196.979, "eval_samples_per_second": 10.153, "eval_steps_per_second": 5.077, "step": 1020 }, { "epoch": 0.13, "learning_rate": 4.9816979743495296e-06, "logits/chosen": -2.864896774291992, "logits/rejected": -2.867267608642578, "logps/chosen": -451.87939453125, "logps/rejected": -427.56402587890625, "loss": 0.6204, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.7102250456809998, "rewards/margins": 0.23890939354896545, "rewards/rejected": -0.9491344690322876, "step": 1030 }, { "epoch": 0.13, "eval_logits/chosen": -2.5824921131134033, "eval_logits/rejected": -2.593012809753418, "eval_logps/chosen": -404.1101379394531, "eval_logps/rejected": -388.3902587890625, "eval_loss": 0.6607739329338074, "eval_rewards/accuracies": 0.6184999942779541, "eval_rewards/chosen": -0.7136072516441345, "eval_rewards/margins": 0.1591949313879013, "eval_rewards/rejected": -0.872802197933197, "eval_runtime": 196.8691, "eval_samples_per_second": 10.159, "eval_steps_per_second": 5.08, "step": 1030 }, { "epoch": 0.14, "learning_rate": 4.980292479253105e-06, "logits/chosen": -2.8844125270843506, "logits/rejected": -2.890221118927002, "logps/chosen": -439.6178283691406, "logps/rejected": -408.71136474609375, "loss": 0.5986, "rewards/accuracies": 0.75, "rewards/chosen": -0.6904081106185913, "rewards/margins": 0.2821322977542877, "rewards/rejected": -0.9725404977798462, "step": 1040 }, { "epoch": 0.14, "eval_logits/chosen": -2.5676214694976807, "eval_logits/rejected": -2.5784456729888916, "eval_logps/chosen": -413.231201171875, "eval_logps/rejected": -398.8504638671875, "eval_loss": 0.6670145392417908, "eval_rewards/accuracies": 0.612500011920929, "eval_rewards/chosen": -0.8048175573348999, "eval_rewards/margins": 0.17258678376674652, "eval_rewards/rejected": -0.9774044156074524, "eval_runtime": 196.6812, "eval_samples_per_second": 10.169, "eval_steps_per_second": 5.084, "step": 1040 }, { "epoch": 0.14, "learning_rate": 4.978835207959414e-06, "logits/chosen": -2.8102452754974365, "logits/rejected": -2.813763380050659, "logps/chosen": -398.92425537109375, "logps/rejected": -385.8040771484375, "loss": 0.6624, "rewards/accuracies": 0.625, "rewards/chosen": -0.7675672769546509, "rewards/margins": 0.1677415817975998, "rewards/rejected": -0.9353087544441223, "step": 1050 }, { "epoch": 0.14, "eval_logits/chosen": -2.5734024047851562, "eval_logits/rejected": -2.5844082832336426, "eval_logps/chosen": -411.4896240234375, "eval_logps/rejected": -397.12872314453125, "eval_loss": 0.6648815870285034, "eval_rewards/accuracies": 0.6150000095367432, "eval_rewards/chosen": -0.7874022126197815, "eval_rewards/margins": 0.17278487980365753, "eval_rewards/rejected": -0.9601870775222778, "eval_runtime": 197.1774, "eval_samples_per_second": 10.143, "eval_steps_per_second": 5.072, "step": 1050 }, { "epoch": 0.14, "learning_rate": 4.977326190889046e-06, "logits/chosen": -2.831808090209961, "logits/rejected": -2.7664592266082764, "logps/chosen": -405.8113708496094, "logps/rejected": -346.0255432128906, "loss": 0.6528, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.664142906665802, "rewards/margins": 0.14156608283519745, "rewards/rejected": -0.8057088851928711, "step": 1060 }, { "epoch": 0.14, "eval_logits/chosen": -2.5949344635009766, "eval_logits/rejected": -2.606123685836792, "eval_logps/chosen": -399.26690673828125, "eval_logps/rejected": -383.4966735839844, "eval_loss": 0.6575686931610107, "eval_rewards/accuracies": 0.6169999837875366, "eval_rewards/chosen": -0.6651748418807983, "eval_rewards/margins": 0.1586921513080597, "eval_rewards/rejected": -0.8238670825958252, "eval_runtime": 196.9062, "eval_samples_per_second": 10.157, "eval_steps_per_second": 5.079, "step": 1060 }, { "epoch": 0.14, "learning_rate": 4.975765459542788e-06, "logits/chosen": -2.8009772300720215, "logits/rejected": -2.8274002075195312, "logps/chosen": -376.3597106933594, "logps/rejected": -378.44921875, "loss": 0.6332, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.5817060470581055, "rewards/margins": 0.20408394932746887, "rewards/rejected": -0.7857899069786072, "step": 1070 }, { "epoch": 0.14, "eval_logits/chosen": -2.5985264778137207, "eval_logits/rejected": -2.610111951828003, "eval_logps/chosen": -396.3663635253906, "eval_logps/rejected": -380.4352722167969, "eval_loss": 0.656349778175354, "eval_rewards/accuracies": 0.6175000071525574, "eval_rewards/chosen": -0.6361696124076843, "eval_rewards/margins": 0.15708313882350922, "eval_rewards/rejected": -0.7932528257369995, "eval_runtime": 196.8426, "eval_samples_per_second": 10.16, "eval_steps_per_second": 5.08, "step": 1070 }, { "epoch": 0.14, "learning_rate": 4.9741530465009665e-06, "logits/chosen": -2.767240285873413, "logits/rejected": -2.743711471557617, "logps/chosen": -362.4321594238281, "logps/rejected": -348.2496032714844, "loss": 0.6364, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.556038498878479, "rewards/margins": 0.1609477698802948, "rewards/rejected": -0.7169862985610962, "step": 1080 }, { "epoch": 0.14, "eval_logits/chosen": -2.5956056118011475, "eval_logits/rejected": -2.607423782348633, "eval_logps/chosen": -397.013916015625, "eval_logps/rejected": -381.3038635253906, "eval_loss": 0.6564236879348755, "eval_rewards/accuracies": 0.6140000224113464, "eval_rewards/chosen": -0.6426447629928589, "eval_rewards/margins": 0.15929388999938965, "eval_rewards/rejected": -0.8019387125968933, "eval_runtime": 196.8787, "eval_samples_per_second": 10.159, "eval_steps_per_second": 5.079, "step": 1080 }, { "epoch": 0.14, "learning_rate": 4.972488985422763e-06, "logits/chosen": -2.787623882293701, "logits/rejected": -2.7924771308898926, "logps/chosen": -364.26190185546875, "logps/rejected": -345.1329650878906, "loss": 0.6063, "rewards/accuracies": 0.625, "rewards/chosen": -0.5557677745819092, "rewards/margins": 0.29480546712875366, "rewards/rejected": -0.8505731821060181, "step": 1090 }, { "epoch": 0.14, "eval_logits/chosen": -2.58791184425354, "eval_logits/rejected": -2.5994956493377686, "eval_logps/chosen": -401.9653015136719, "eval_logps/rejected": -387.0663757324219, "eval_loss": 0.6588745713233948, "eval_rewards/accuracies": 0.6140000224113464, "eval_rewards/chosen": -0.6921590566635132, "eval_rewards/margins": 0.16740475594997406, "eval_rewards/rejected": -0.8595638275146484, "eval_runtime": 196.8539, "eval_samples_per_second": 10.16, "eval_steps_per_second": 5.08, "step": 1090 }, { "epoch": 0.14, "learning_rate": 4.970773311045514e-06, "logits/chosen": -2.7719860076904297, "logits/rejected": -2.7706387042999268, "logps/chosen": -385.5480651855469, "logps/rejected": -369.0314636230469, "loss": 0.6684, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.6518079042434692, "rewards/margins": 0.12277636677026749, "rewards/rejected": -0.7745842337608337, "step": 1100 }, { "epoch": 0.14, "eval_logits/chosen": -2.595028877258301, "eval_logits/rejected": -2.6067097187042236, "eval_logps/chosen": -397.0016784667969, "eval_logps/rejected": -381.60601806640625, "eval_loss": 0.6570342183113098, "eval_rewards/accuracies": 0.6159999966621399, "eval_rewards/chosen": -0.6425228714942932, "eval_rewards/margins": 0.1624370813369751, "eval_rewards/rejected": -0.8049599528312683, "eval_runtime": 197.1886, "eval_samples_per_second": 10.143, "eval_steps_per_second": 5.071, "step": 1100 }, { "epoch": 0.15, "learning_rate": 4.969006059183984e-06, "logits/chosen": -2.790360689163208, "logits/rejected": -2.7791943550109863, "logps/chosen": -398.4950866699219, "logps/rejected": -373.24981689453125, "loss": 0.6948, "rewards/accuracies": 0.625, "rewards/chosen": -0.618482768535614, "rewards/margins": 0.09040616452693939, "rewards/rejected": -0.7088888883590698, "step": 1110 }, { "epoch": 0.15, "eval_logits/chosen": -2.6154470443725586, "eval_logits/rejected": -2.6268742084503174, "eval_logps/chosen": -386.2301940917969, "eval_logps/rejected": -369.3778381347656, "eval_loss": 0.6534083485603333, "eval_rewards/accuracies": 0.6240000128746033, "eval_rewards/chosen": -0.5348080396652222, "eval_rewards/margins": 0.1478704810142517, "eval_rewards/rejected": -0.6826784610748291, "eval_runtime": 197.1538, "eval_samples_per_second": 10.144, "eval_steps_per_second": 5.072, "step": 1110 }, { "epoch": 0.15, "learning_rate": 4.967187266729623e-06, "logits/chosen": -2.917677164077759, "logits/rejected": -2.8968892097473145, "logps/chosen": -393.16241455078125, "logps/rejected": -371.6067199707031, "loss": 0.683, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.5983080863952637, "rewards/margins": 0.09545192122459412, "rewards/rejected": -0.6937600374221802, "step": 1120 }, { "epoch": 0.15, "eval_logits/chosen": -2.6260616779327393, "eval_logits/rejected": -2.6370902061462402, "eval_logps/chosen": -381.9963073730469, "eval_logps/rejected": -364.393310546875, "eval_loss": 0.652645468711853, "eval_rewards/accuracies": 0.6255000233650208, "eval_rewards/chosen": -0.49246877431869507, "eval_rewards/margins": 0.1403646171092987, "eval_rewards/rejected": -0.6328333616256714, "eval_runtime": 196.8079, "eval_samples_per_second": 10.162, "eval_steps_per_second": 5.081, "step": 1120 }, { "epoch": 0.15, "learning_rate": 4.965316971649791e-06, "logits/chosen": -2.8983585834503174, "logits/rejected": -2.887768030166626, "logps/chosen": -404.21990966796875, "logps/rejected": -374.83392333984375, "loss": 0.5879, "rewards/accuracies": 0.75, "rewards/chosen": -0.43791407346725464, "rewards/margins": 0.28374427556991577, "rewards/rejected": -0.7216584086418152, "step": 1130 }, { "epoch": 0.15, "eval_logits/chosen": -2.6281559467315674, "eval_logits/rejected": -2.639291763305664, "eval_logps/chosen": -381.9638366699219, "eval_logps/rejected": -364.2988586425781, "eval_loss": 0.6523311138153076, "eval_rewards/accuracies": 0.6209999918937683, "eval_rewards/chosen": -0.4921444058418274, "eval_rewards/margins": 0.13974425196647644, "eval_rewards/rejected": -0.6318886280059814, "eval_runtime": 196.9943, "eval_samples_per_second": 10.153, "eval_steps_per_second": 5.076, "step": 1130 }, { "epoch": 0.15, "learning_rate": 4.963395212986964e-06, "logits/chosen": -2.8828487396240234, "logits/rejected": -2.862426280975342, "logps/chosen": -347.30792236328125, "logps/rejected": -316.6706237792969, "loss": 0.6422, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.4925723075866699, "rewards/margins": 0.1589849591255188, "rewards/rejected": -0.6515573263168335, "step": 1140 }, { "epoch": 0.15, "eval_logits/chosen": -2.628563404083252, "eval_logits/rejected": -2.6401455402374268, "eval_logps/chosen": -382.7380676269531, "eval_logps/rejected": -365.3920593261719, "eval_loss": 0.6521285176277161, "eval_rewards/accuracies": 0.6234999895095825, "eval_rewards/chosen": -0.49988648295402527, "eval_rewards/margins": 0.1429338902235031, "eval_rewards/rejected": -0.6428203582763672, "eval_runtime": 196.8857, "eval_samples_per_second": 10.158, "eval_steps_per_second": 5.079, "step": 1140 }, { "epoch": 0.15, "learning_rate": 4.9614220308579285e-06, "logits/chosen": -2.8444035053253174, "logits/rejected": -2.877077341079712, "logps/chosen": -386.1272888183594, "logps/rejected": -391.4237976074219, "loss": 0.6534, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.5051605105400085, "rewards/margins": 0.12393464893102646, "rewards/rejected": -0.629095196723938, "step": 1150 }, { "epoch": 0.15, "eval_logits/chosen": -2.623694896697998, "eval_logits/rejected": -2.6355655193328857, "eval_logps/chosen": -384.07421875, "eval_logps/rejected": -367.05096435546875, "eval_loss": 0.6519166231155396, "eval_rewards/accuracies": 0.6215000152587891, "eval_rewards/chosen": -0.5132482051849365, "eval_rewards/margins": 0.14616157114505768, "eval_rewards/rejected": -0.6594097018241882, "eval_runtime": 197.0766, "eval_samples_per_second": 10.148, "eval_steps_per_second": 5.074, "step": 1150 }, { "epoch": 0.15, "learning_rate": 4.9593974664529325e-06, "logits/chosen": -2.8335769176483154, "logits/rejected": -2.8060200214385986, "logps/chosen": -384.097412109375, "logps/rejected": -385.43145751953125, "loss": 0.642, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.5143892168998718, "rewards/margins": 0.1666194498538971, "rewards/rejected": -0.6810086369514465, "step": 1160 }, { "epoch": 0.15, "eval_logits/chosen": -2.618823528289795, "eval_logits/rejected": -2.6312339305877686, "eval_logps/chosen": -386.3024597167969, "eval_logps/rejected": -369.7018737792969, "eval_loss": 0.6519332528114319, "eval_rewards/accuracies": 0.6240000128746033, "eval_rewards/chosen": -0.535530686378479, "eval_rewards/margins": 0.1503879874944687, "eval_rewards/rejected": -0.6859186887741089, "eval_runtime": 196.9, "eval_samples_per_second": 10.157, "eval_steps_per_second": 5.079, "step": 1160 }, { "epoch": 0.15, "learning_rate": 4.957321562034833e-06, "logits/chosen": -2.9319796562194824, "logits/rejected": -2.925686836242676, "logps/chosen": -401.462890625, "logps/rejected": -396.30706787109375, "loss": 0.6138, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.5492364168167114, "rewards/margins": 0.2358781397342682, "rewards/rejected": -0.7851146459579468, "step": 1170 }, { "epoch": 0.15, "eval_logits/chosen": -2.6126275062561035, "eval_logits/rejected": -2.625770092010498, "eval_logps/chosen": -390.0626220703125, "eval_logps/rejected": -374.27813720703125, "eval_loss": 0.6525918245315552, "eval_rewards/accuracies": 0.6230000257492065, "eval_rewards/chosen": -0.5731325745582581, "eval_rewards/margins": 0.15854857861995697, "eval_rewards/rejected": -0.7316811680793762, "eval_runtime": 196.7922, "eval_samples_per_second": 10.163, "eval_steps_per_second": 5.082, "step": 1170 }, { "epoch": 0.15, "learning_rate": 4.955194360938214e-06, "logits/chosen": -2.9208590984344482, "logits/rejected": -2.9480223655700684, "logps/chosen": -372.567626953125, "logps/rejected": -351.185791015625, "loss": 0.6603, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.5812569260597229, "rewards/margins": 0.12481508404016495, "rewards/rejected": -0.7060720324516296, "step": 1180 }, { "epoch": 0.15, "eval_logits/chosen": -2.609142780303955, "eval_logits/rejected": -2.6229264736175537, "eval_logps/chosen": -395.072021484375, "eval_logps/rejected": -380.0800476074219, "eval_loss": 0.6529130935668945, "eval_rewards/accuracies": 0.6184999942779541, "eval_rewards/chosen": -0.6232264041900635, "eval_rewards/margins": 0.16647417843341827, "eval_rewards/rejected": -0.7897005081176758, "eval_runtime": 197.0058, "eval_samples_per_second": 10.152, "eval_steps_per_second": 5.076, "step": 1180 }, { "epoch": 0.16, "learning_rate": 4.9530159075684735e-06, "logits/chosen": -2.8826727867126465, "logits/rejected": -2.865142345428467, "logps/chosen": -355.7762145996094, "logps/rejected": -448.8487854003906, "loss": 0.6446, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.6446607112884521, "rewards/margins": 0.2116236686706543, "rewards/rejected": -0.8562844395637512, "step": 1190 }, { "epoch": 0.16, "eval_logits/chosen": -2.6025123596191406, "eval_logits/rejected": -2.6169042587280273, "eval_logps/chosen": -397.5279235839844, "eval_logps/rejected": -382.97857666015625, "eval_loss": 0.6540065407752991, "eval_rewards/accuracies": 0.6159999966621399, "eval_rewards/chosen": -0.6477850675582886, "eval_rewards/margins": 0.17090027034282684, "eval_rewards/rejected": -0.8186854124069214, "eval_runtime": 197.0343, "eval_samples_per_second": 10.151, "eval_steps_per_second": 5.075, "step": 1190 }, { "epoch": 0.16, "learning_rate": 4.950786247400908e-06, "logits/chosen": -2.848290205001831, "logits/rejected": -2.8513758182525635, "logps/chosen": -365.59149169921875, "logps/rejected": -357.754150390625, "loss": 0.6647, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.6885515451431274, "rewards/margins": 0.1252683699131012, "rewards/rejected": -0.8138198852539062, "step": 1200 }, { "epoch": 0.16, "eval_logits/chosen": -2.599571466445923, "eval_logits/rejected": -2.6143128871917725, "eval_logps/chosen": -398.3748474121094, "eval_logps/rejected": -383.98876953125, "eval_loss": 0.6546086668968201, "eval_rewards/accuracies": 0.6184999942779541, "eval_rewards/chosen": -0.6562545299530029, "eval_rewards/margins": 0.17253316938877106, "eval_rewards/rejected": -0.8287877440452576, "eval_runtime": 197.0359, "eval_samples_per_second": 10.15, "eval_steps_per_second": 5.075, "step": 1200 }, { "epoch": 0.16, "learning_rate": 4.948505426979756e-06, "logits/chosen": -2.82503342628479, "logits/rejected": -2.8127999305725098, "logps/chosen": -384.06732177734375, "logps/rejected": -385.8426513671875, "loss": 0.6214, "rewards/accuracies": 0.625, "rewards/chosen": -0.6643961071968079, "rewards/margins": 0.2610850930213928, "rewards/rejected": -0.9254812002182007, "step": 1210 }, { "epoch": 0.16, "eval_logits/chosen": -2.6038050651550293, "eval_logits/rejected": -2.618673801422119, "eval_logps/chosen": -400.09637451171875, "eval_logps/rejected": -385.89654541015625, "eval_loss": 0.653429388999939, "eval_rewards/accuracies": 0.6225000023841858, "eval_rewards/chosen": -0.6734698414802551, "eval_rewards/margins": 0.17439521849155426, "eval_rewards/rejected": -0.8478650450706482, "eval_runtime": 196.8932, "eval_samples_per_second": 10.158, "eval_steps_per_second": 5.079, "step": 1210 }, { "epoch": 0.16, "learning_rate": 4.946173493917228e-06, "logits/chosen": -2.826169490814209, "logits/rejected": -2.832860231399536, "logps/chosen": -395.909423828125, "logps/rejected": -354.52105712890625, "loss": 0.7606, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.7976791262626648, "rewards/margins": -0.05043324828147888, "rewards/rejected": -0.7472458481788635, "step": 1220 }, { "epoch": 0.16, "eval_logits/chosen": -2.6216962337493896, "eval_logits/rejected": -2.635549545288086, "eval_logps/chosen": -393.2814636230469, "eval_logps/rejected": -377.7646484375, "eval_loss": 0.650115430355072, "eval_rewards/accuracies": 0.6234999895095825, "eval_rewards/chosen": -0.6053206324577332, "eval_rewards/margins": 0.16122600436210632, "eval_rewards/rejected": -0.7665466070175171, "eval_runtime": 197.0589, "eval_samples_per_second": 10.149, "eval_steps_per_second": 5.075, "step": 1220 }, { "epoch": 0.16, "learning_rate": 4.943790496892513e-06, "logits/chosen": -2.900090456008911, "logits/rejected": -2.9031574726104736, "logps/chosen": -381.09210205078125, "logps/rejected": -344.6459655761719, "loss": 0.64, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5675815939903259, "rewards/margins": 0.18301823735237122, "rewards/rejected": -0.7505998015403748, "step": 1230 }, { "epoch": 0.16, "eval_logits/chosen": -2.635103464126587, "eval_logits/rejected": -2.648451566696167, "eval_logps/chosen": -388.4599304199219, "eval_logps/rejected": -372.07000732421875, "eval_loss": 0.648918867111206, "eval_rewards/accuracies": 0.6234999895095825, "eval_rewards/chosen": -0.5571054816246033, "eval_rewards/margins": 0.15249404311180115, "eval_rewards/rejected": -0.709599494934082, "eval_runtime": 196.9092, "eval_samples_per_second": 10.157, "eval_steps_per_second": 5.078, "step": 1230 }, { "epoch": 0.16, "learning_rate": 4.941356485650762e-06, "logits/chosen": -2.9525580406188965, "logits/rejected": -2.941685676574707, "logps/chosen": -429.52752685546875, "logps/rejected": -408.3736877441406, "loss": 0.6503, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.5319823026657104, "rewards/margins": 0.152207612991333, "rewards/rejected": -0.6841899752616882, "step": 1240 }, { "epoch": 0.16, "eval_logits/chosen": -2.641964912414551, "eval_logits/rejected": -2.6544265747070312, "eval_logps/chosen": -383.81805419921875, "eval_logps/rejected": -366.7745666503906, "eval_loss": 0.6485514044761658, "eval_rewards/accuracies": 0.628000020980835, "eval_rewards/chosen": -0.5106862187385559, "eval_rewards/margins": 0.14595915377140045, "eval_rewards/rejected": -0.6566452980041504, "eval_runtime": 197.009, "eval_samples_per_second": 10.152, "eval_steps_per_second": 5.076, "step": 1240 }, { "epoch": 0.16, "learning_rate": 4.93887151100205e-06, "logits/chosen": -2.8823115825653076, "logits/rejected": -2.9025607109069824, "logps/chosen": -431.13311767578125, "logps/rejected": -402.4587707519531, "loss": 0.6625, "rewards/accuracies": 0.6875, "rewards/chosen": -0.45433109998703003, "rewards/margins": 0.09613112360239029, "rewards/rejected": -0.5504623055458069, "step": 1250 }, { "epoch": 0.16, "eval_logits/chosen": -2.650139570236206, "eval_logits/rejected": -2.6617093086242676, "eval_logps/chosen": -379.8656311035156, "eval_logps/rejected": -362.19818115234375, "eval_loss": 0.6486051082611084, "eval_rewards/accuracies": 0.6290000081062317, "eval_rewards/chosen": -0.47116225957870483, "eval_rewards/margins": 0.139719620347023, "eval_rewards/rejected": -0.6108819246292114, "eval_runtime": 197.0634, "eval_samples_per_second": 10.149, "eval_steps_per_second": 5.075, "step": 1250 }, { "epoch": 0.16, "learning_rate": 4.936335624820313e-06, "logits/chosen": -2.9498510360717773, "logits/rejected": -2.936628818511963, "logps/chosen": -369.2878112792969, "logps/rejected": -331.2504577636719, "loss": 0.6365, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.41221198439598083, "rewards/margins": 0.15033474564552307, "rewards/rejected": -0.5625467896461487, "step": 1260 }, { "epoch": 0.16, "eval_logits/chosen": -2.648029088973999, "eval_logits/rejected": -2.659898281097412, "eval_logps/chosen": -379.5408630371094, "eval_logps/rejected": -361.9754638671875, "eval_loss": 0.6484161615371704, "eval_rewards/accuracies": 0.6299999952316284, "eval_rewards/chosen": -0.4679144322872162, "eval_rewards/margins": 0.1407402604818344, "eval_rewards/rejected": -0.6086547374725342, "eval_runtime": 196.9078, "eval_samples_per_second": 10.157, "eval_steps_per_second": 5.079, "step": 1260 }, { "epoch": 0.17, "learning_rate": 4.933748880042271e-06, "logits/chosen": -2.9828124046325684, "logits/rejected": -2.9394354820251465, "logps/chosen": -375.38494873046875, "logps/rejected": -345.7095642089844, "loss": 0.6314, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.43731123208999634, "rewards/margins": 0.17238859832286835, "rewards/rejected": -0.6096998453140259, "step": 1270 }, { "epoch": 0.17, "eval_logits/chosen": -2.6389575004577637, "eval_logits/rejected": -2.6514506340026855, "eval_logps/chosen": -384.9039001464844, "eval_logps/rejected": -368.42083740234375, "eval_loss": 0.6474685072898865, "eval_rewards/accuracies": 0.6274999976158142, "eval_rewards/chosen": -0.5215447545051575, "eval_rewards/margins": 0.1515636146068573, "eval_rewards/rejected": -0.6731082797050476, "eval_runtime": 196.8359, "eval_samples_per_second": 10.161, "eval_steps_per_second": 5.08, "step": 1270 }, { "epoch": 0.17, "learning_rate": 4.931111330666317e-06, "logits/chosen": -2.8784518241882324, "logits/rejected": -2.8599307537078857, "logps/chosen": -365.7098693847656, "logps/rejected": -329.1894836425781, "loss": 0.6429, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.5193601846694946, "rewards/margins": 0.14618203043937683, "rewards/rejected": -0.6655422449111938, "step": 1280 }, { "epoch": 0.17, "eval_logits/chosen": -2.629905939102173, "eval_logits/rejected": -2.643465042114258, "eval_logps/chosen": -391.0508728027344, "eval_logps/rejected": -375.53936767578125, "eval_loss": 0.6468499898910522, "eval_rewards/accuracies": 0.6265000104904175, "eval_rewards/chosen": -0.5830146670341492, "eval_rewards/margins": 0.1612788736820221, "eval_rewards/rejected": -0.7442935109138489, "eval_runtime": 197.1153, "eval_samples_per_second": 10.146, "eval_steps_per_second": 5.073, "step": 1280 }, { "epoch": 0.17, "learning_rate": 4.9284230317513906e-06, "logits/chosen": -2.9220080375671387, "logits/rejected": -2.8997421264648438, "logps/chosen": -420.4480895996094, "logps/rejected": -379.02520751953125, "loss": 0.6351, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.5989479422569275, "rewards/margins": 0.19692249596118927, "rewards/rejected": -0.7958704829216003, "step": 1290 }, { "epoch": 0.17, "eval_logits/chosen": -2.6292710304260254, "eval_logits/rejected": -2.643364667892456, "eval_logps/chosen": -395.0453186035156, "eval_logps/rejected": -380.0833740234375, "eval_loss": 0.6463254690170288, "eval_rewards/accuracies": 0.6255000233650208, "eval_rewards/chosen": -0.6229589581489563, "eval_rewards/margins": 0.16677448153495789, "eval_rewards/rejected": -0.7897334694862366, "eval_runtime": 196.93, "eval_samples_per_second": 10.156, "eval_steps_per_second": 5.078, "step": 1290 }, { "epoch": 0.17, "learning_rate": 4.9256840394158325e-06, "logits/chosen": -2.8061976432800293, "logits/rejected": -2.8045198917388916, "logps/chosen": -407.2315368652344, "logps/rejected": -452.9224548339844, "loss": 0.6147, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.6434057950973511, "rewards/margins": 0.2461806833744049, "rewards/rejected": -0.8895864486694336, "step": 1300 }, { "epoch": 0.17, "eval_logits/chosen": -2.6303420066833496, "eval_logits/rejected": -2.6449639797210693, "eval_logps/chosen": -402.0264892578125, "eval_logps/rejected": -387.9538269042969, "eval_loss": 0.6466883420944214, "eval_rewards/accuracies": 0.621999979019165, "eval_rewards/chosen": -0.6927708387374878, "eval_rewards/margins": 0.17566701769828796, "eval_rewards/rejected": -0.8684378862380981, "eval_runtime": 197.2101, "eval_samples_per_second": 10.141, "eval_steps_per_second": 5.071, "step": 1300 }, { "epoch": 0.17, "learning_rate": 4.922894410836207e-06, "logits/chosen": -2.8735668659210205, "logits/rejected": -2.8372910022735596, "logps/chosen": -431.63714599609375, "logps/rejected": -371.83966064453125, "loss": 0.6809, "rewards/accuracies": 0.5625, "rewards/chosen": -0.7498286962509155, "rewards/margins": 0.13587155938148499, "rewards/rejected": -0.8857002258300781, "step": 1310 }, { "epoch": 0.17, "eval_logits/chosen": -2.6348013877868652, "eval_logits/rejected": -2.648871421813965, "eval_logps/chosen": -406.22467041015625, "eval_logps/rejected": -392.4579162597656, "eval_loss": 0.6477887034416199, "eval_rewards/accuracies": 0.6215000152587891, "eval_rewards/chosen": -0.7347524166107178, "eval_rewards/margins": 0.17872664332389832, "eval_rewards/rejected": -0.9134791493415833, "eval_runtime": 196.9305, "eval_samples_per_second": 10.156, "eval_steps_per_second": 5.078, "step": 1310 }, { "epoch": 0.17, "learning_rate": 4.920054204246116e-06, "logits/chosen": -2.89911150932312, "logits/rejected": -2.8787920475006104, "logps/chosen": -411.595947265625, "logps/rejected": -365.5648498535156, "loss": 0.6469, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.6206297874450684, "rewards/margins": 0.1485154777765274, "rewards/rejected": -0.7691451907157898, "step": 1320 }, { "epoch": 0.17, "eval_logits/chosen": -2.6406266689300537, "eval_logits/rejected": -2.6541240215301514, "eval_logps/chosen": -400.6297912597656, "eval_logps/rejected": -386.1690979003906, "eval_loss": 0.6465025544166565, "eval_rewards/accuracies": 0.6200000047683716, "eval_rewards/chosen": -0.6788040399551392, "eval_rewards/margins": 0.17178669571876526, "eval_rewards/rejected": -0.8505907654762268, "eval_runtime": 197.1066, "eval_samples_per_second": 10.147, "eval_steps_per_second": 5.073, "step": 1320 }, { "epoch": 0.17, "learning_rate": 4.9171634789349744e-06, "logits/chosen": -2.873453140258789, "logits/rejected": -2.8838162422180176, "logps/chosen": -391.2914123535156, "logps/rejected": -407.5999755859375, "loss": 0.5969, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.6366595029830933, "rewards/margins": 0.26933524012565613, "rewards/rejected": -0.9059947729110718, "step": 1330 }, { "epoch": 0.17, "eval_logits/chosen": -2.634784460067749, "eval_logits/rejected": -2.6493115425109863, "eval_logps/chosen": -399.88433837890625, "eval_logps/rejected": -385.7465515136719, "eval_loss": 0.6460168361663818, "eval_rewards/accuracies": 0.6215000152587891, "eval_rewards/chosen": -0.6713496446609497, "eval_rewards/margins": 0.17501556873321533, "eval_rewards/rejected": -0.846365213394165, "eval_runtime": 197.0915, "eval_samples_per_second": 10.148, "eval_steps_per_second": 5.074, "step": 1330 }, { "epoch": 0.18, "learning_rate": 4.914222295246782e-06, "logits/chosen": -2.8562376499176025, "logits/rejected": -2.856698513031006, "logps/chosen": -392.6881408691406, "logps/rejected": -384.72723388671875, "loss": 0.6755, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.6853694915771484, "rewards/margins": 0.09361520409584045, "rewards/rejected": -0.7789847254753113, "step": 1340 }, { "epoch": 0.18, "eval_logits/chosen": -2.6282548904418945, "eval_logits/rejected": -2.6436192989349365, "eval_logps/chosen": -400.68450927734375, "eval_logps/rejected": -386.9583740234375, "eval_loss": 0.6460389494895935, "eval_rewards/accuracies": 0.6209999918937683, "eval_rewards/chosen": -0.6793510317802429, "eval_rewards/margins": 0.1791324019432068, "eval_rewards/rejected": -0.8584833741188049, "eval_runtime": 197.0616, "eval_samples_per_second": 10.149, "eval_steps_per_second": 5.075, "step": 1340 }, { "epoch": 0.18, "learning_rate": 4.911230714578858e-06, "logits/chosen": -2.837684154510498, "logits/rejected": -2.8746697902679443, "logps/chosen": -336.82830810546875, "logps/rejected": -383.8268127441406, "loss": 0.6043, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.6028086543083191, "rewards/margins": 0.26204943656921387, "rewards/rejected": -0.8648580312728882, "step": 1350 }, { "epoch": 0.18, "eval_logits/chosen": -2.619495391845703, "eval_logits/rejected": -2.6352591514587402, "eval_logps/chosen": -400.1864013671875, "eval_logps/rejected": -386.6622009277344, "eval_loss": 0.6463934183120728, "eval_rewards/accuracies": 0.6265000104904175, "eval_rewards/chosen": -0.674369752407074, "eval_rewards/margins": 0.18115192651748657, "eval_rewards/rejected": -0.8555216789245605, "eval_runtime": 196.9235, "eval_samples_per_second": 10.156, "eval_steps_per_second": 5.078, "step": 1350 }, { "epoch": 0.18, "learning_rate": 4.908188799380558e-06, "logits/chosen": -2.8478968143463135, "logits/rejected": -2.8693909645080566, "logps/chosen": -372.5030517578125, "logps/rejected": -350.12347412109375, "loss": 0.6213, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.6362836360931396, "rewards/margins": 0.21307387948036194, "rewards/rejected": -0.8493574857711792, "step": 1360 }, { "epoch": 0.18, "eval_logits/chosen": -2.6073155403137207, "eval_logits/rejected": -2.623591899871826, "eval_logps/chosen": -405.56060791015625, "eval_logps/rejected": -392.87646484375, "eval_loss": 0.6476759314537048, "eval_rewards/accuracies": 0.6274999976158142, "eval_rewards/chosen": -0.7281119227409363, "eval_rewards/margins": 0.18955254554748535, "eval_rewards/rejected": -0.9176644682884216, "eval_runtime": 196.7678, "eval_samples_per_second": 10.164, "eval_steps_per_second": 5.082, "step": 1360 }, { "epoch": 0.18, "learning_rate": 4.905096613151975e-06, "logits/chosen": -2.7704315185546875, "logits/rejected": -2.7330398559570312, "logps/chosen": -442.86669921875, "logps/rejected": -425.8495178222656, "loss": 0.6907, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -0.786264181137085, "rewards/margins": 0.07904358208179474, "rewards/rejected": -0.8653076887130737, "step": 1370 }, { "epoch": 0.18, "eval_logits/chosen": -2.6075170040130615, "eval_logits/rejected": -2.623138904571533, "eval_logps/chosen": -406.0066223144531, "eval_logps/rejected": -393.4615478515625, "eval_loss": 0.6468802690505981, "eval_rewards/accuracies": 0.625, "eval_rewards/chosen": -0.7325721979141235, "eval_rewards/margins": 0.19094309210777283, "eval_rewards/rejected": -0.923515260219574, "eval_runtime": 196.7864, "eval_samples_per_second": 10.163, "eval_steps_per_second": 5.082, "step": 1370 }, { "epoch": 0.18, "learning_rate": 4.90195422044261e-06, "logits/chosen": -2.8514914512634277, "logits/rejected": -2.8642072677612305, "logps/chosen": -419.1561584472656, "logps/rejected": -406.09771728515625, "loss": 0.6027, "rewards/accuracies": 0.6875, "rewards/chosen": -0.6098551750183105, "rewards/margins": 0.275759756565094, "rewards/rejected": -0.8856149911880493, "step": 1380 }, { "epoch": 0.18, "eval_logits/chosen": -2.6010429859161377, "eval_logits/rejected": -2.616837739944458, "eval_logps/chosen": -407.6045227050781, "eval_logps/rejected": -395.3316650390625, "eval_loss": 0.6467740535736084, "eval_rewards/accuracies": 0.6215000152587891, "eval_rewards/chosen": -0.7485515475273132, "eval_rewards/margins": 0.19366492331027985, "eval_rewards/rejected": -0.9422163963317871, "eval_runtime": 197.4331, "eval_samples_per_second": 10.13, "eval_steps_per_second": 5.065, "step": 1380 }, { "epoch": 0.18, "learning_rate": 4.898761686850028e-06, "logits/chosen": -2.7812657356262207, "logits/rejected": -2.746971368789673, "logps/chosen": -409.679443359375, "logps/rejected": -418.7041015625, "loss": 0.6505, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.8384740948677063, "rewards/margins": 0.23909902572631836, "rewards/rejected": -1.0775730609893799, "step": 1390 }, { "epoch": 0.18, "eval_logits/chosen": -2.6073296070098877, "eval_logits/rejected": -2.6223056316375732, "eval_logps/chosen": -404.7939758300781, "eval_logps/rejected": -392.07623291015625, "eval_loss": 0.6456750631332397, "eval_rewards/accuracies": 0.625, "eval_rewards/chosen": -0.7204453349113464, "eval_rewards/margins": 0.18921701610088348, "eval_rewards/rejected": -0.9096623659133911, "eval_runtime": 196.8403, "eval_samples_per_second": 10.161, "eval_steps_per_second": 5.08, "step": 1390 }, { "epoch": 0.18, "learning_rate": 4.895519079018485e-06, "logits/chosen": -2.752323627471924, "logits/rejected": -2.7307071685791016, "logps/chosen": -385.1970520019531, "logps/rejected": -366.06488037109375, "loss": 0.6131, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.6806402206420898, "rewards/margins": 0.29367339611053467, "rewards/rejected": -0.9743136167526245, "step": 1400 }, { "epoch": 0.18, "eval_logits/chosen": -2.611858367919922, "eval_logits/rejected": -2.6268720626831055, "eval_logps/chosen": -402.51055908203125, "eval_logps/rejected": -389.5133056640625, "eval_loss": 0.6447837948799133, "eval_rewards/accuracies": 0.6265000104904175, "eval_rewards/chosen": -0.6976117491722107, "eval_rewards/margins": 0.18642136454582214, "eval_rewards/rejected": -0.8840330839157104, "eval_runtime": 196.9091, "eval_samples_per_second": 10.157, "eval_steps_per_second": 5.078, "step": 1400 }, { "epoch": 0.18, "learning_rate": 4.89222646463754e-06, "logits/chosen": -2.8868727684020996, "logits/rejected": -2.8568384647369385, "logps/chosen": -393.14556884765625, "logps/rejected": -396.2594299316406, "loss": 0.6537, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.7115556001663208, "rewards/margins": 0.20605134963989258, "rewards/rejected": -0.9176069498062134, "step": 1410 }, { "epoch": 0.18, "eval_logits/chosen": -2.614816665649414, "eval_logits/rejected": -2.629605770111084, "eval_logps/chosen": -400.4852600097656, "eval_logps/rejected": -387.1134948730469, "eval_loss": 0.6440988183021545, "eval_rewards/accuracies": 0.6274999976158142, "eval_rewards/chosen": -0.6773582100868225, "eval_rewards/margins": 0.18267665803432465, "eval_rewards/rejected": -0.8600347638130188, "eval_runtime": 197.1086, "eval_samples_per_second": 10.147, "eval_steps_per_second": 5.073, "step": 1410 }, { "epoch": 0.19, "learning_rate": 4.888883912440642e-06, "logits/chosen": -2.8805582523345947, "logits/rejected": -2.9014639854431152, "logps/chosen": -458.44256591796875, "logps/rejected": -453.98486328125, "loss": 0.6384, "rewards/accuracies": 0.625, "rewards/chosen": -0.7284379005432129, "rewards/margins": 0.20983977615833282, "rewards/rejected": -0.9382778406143188, "step": 1420 }, { "epoch": 0.19, "eval_logits/chosen": -2.617636203765869, "eval_logits/rejected": -2.6321375370025635, "eval_logps/chosen": -399.8628845214844, "eval_logps/rejected": -386.35150146484375, "eval_loss": 0.6433753371238708, "eval_rewards/accuracies": 0.628000020980835, "eval_rewards/chosen": -0.6711348295211792, "eval_rewards/margins": 0.1812804937362671, "eval_rewards/rejected": -0.8524152636528015, "eval_runtime": 196.9731, "eval_samples_per_second": 10.154, "eval_steps_per_second": 5.077, "step": 1420 }, { "epoch": 0.19, "learning_rate": 4.885491492203688e-06, "logits/chosen": -2.8176732063293457, "logits/rejected": -2.8348517417907715, "logps/chosen": -400.16973876953125, "logps/rejected": -385.3367614746094, "loss": 0.6132, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.603407084941864, "rewards/margins": 0.2256653755903244, "rewards/rejected": -0.8290724754333496, "step": 1430 }, { "epoch": 0.19, "eval_logits/chosen": -2.6181766986846924, "eval_logits/rejected": -2.632568359375, "eval_logps/chosen": -403.1084289550781, "eval_logps/rejected": -390.0040588378906, "eval_loss": 0.6431609988212585, "eval_rewards/accuracies": 0.6269999742507935, "eval_rewards/chosen": -0.703589916229248, "eval_rewards/margins": 0.18535077571868896, "eval_rewards/rejected": -0.8889405727386475, "eval_runtime": 196.9319, "eval_samples_per_second": 10.156, "eval_steps_per_second": 5.078, "step": 1430 }, { "epoch": 0.19, "learning_rate": 4.882049274743578e-06, "logits/chosen": -2.9042248725891113, "logits/rejected": -2.891632556915283, "logps/chosen": -448.39520263671875, "logps/rejected": -420.09332275390625, "loss": 0.6443, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.664592981338501, "rewards/margins": 0.17925769090652466, "rewards/rejected": -0.8438507318496704, "step": 1440 }, { "epoch": 0.19, "eval_logits/chosen": -2.6198770999908447, "eval_logits/rejected": -2.6345512866973877, "eval_logps/chosen": -403.4805908203125, "eval_logps/rejected": -390.4880676269531, "eval_loss": 0.6423071622848511, "eval_rewards/accuracies": 0.6265000104904175, "eval_rewards/chosen": -0.7073121070861816, "eval_rewards/margins": 0.18646840751171112, "eval_rewards/rejected": -0.893780529499054, "eval_runtime": 196.9843, "eval_samples_per_second": 10.153, "eval_steps_per_second": 5.077, "step": 1440 }, { "epoch": 0.19, "learning_rate": 4.878557331916729e-06, "logits/chosen": -2.8701610565185547, "logits/rejected": -2.8831980228424072, "logps/chosen": -390.12823486328125, "logps/rejected": -377.2284240722656, "loss": 0.615, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.6979155540466309, "rewards/margins": 0.23393838107585907, "rewards/rejected": -0.9318540692329407, "step": 1450 }, { "epoch": 0.19, "eval_logits/chosen": -2.6227636337280273, "eval_logits/rejected": -2.6376028060913086, "eval_logps/chosen": -401.7284851074219, "eval_logps/rejected": -388.5261535644531, "eval_loss": 0.6419389843940735, "eval_rewards/accuracies": 0.6269999742507935, "eval_rewards/chosen": -0.689790666103363, "eval_rewards/margins": 0.18437045812606812, "eval_rewards/rejected": -0.8741611242294312, "eval_runtime": 196.9963, "eval_samples_per_second": 10.152, "eval_steps_per_second": 5.076, "step": 1450 }, { "epoch": 0.19, "learning_rate": 4.875015736617576e-06, "logits/chosen": -2.7935924530029297, "logits/rejected": -2.766704559326172, "logps/chosen": -483.1861877441406, "logps/rejected": -444.1046447753906, "loss": 0.6368, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.717400074005127, "rewards/margins": 0.21793465316295624, "rewards/rejected": -0.9353348016738892, "step": 1460 }, { "epoch": 0.19, "eval_logits/chosen": -2.612457036972046, "eval_logits/rejected": -2.628230333328247, "eval_logps/chosen": -404.81500244140625, "eval_logps/rejected": -392.3670654296875, "eval_loss": 0.6420219540596008, "eval_rewards/accuracies": 0.6365000009536743, "eval_rewards/chosen": -0.7206559777259827, "eval_rewards/margins": 0.19191448390483856, "eval_rewards/rejected": -0.9125705361366272, "eval_runtime": 196.8079, "eval_samples_per_second": 10.162, "eval_steps_per_second": 5.081, "step": 1460 }, { "epoch": 0.19, "learning_rate": 4.8714245627770515e-06, "logits/chosen": -2.8471336364746094, "logits/rejected": -2.8089940547943115, "logps/chosen": -383.68597412109375, "logps/rejected": -341.060546875, "loss": 0.6896, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.7236995697021484, "rewards/margins": 0.07411099970340729, "rewards/rejected": -0.7978106141090393, "step": 1470 }, { "epoch": 0.19, "eval_logits/chosen": -2.614165782928467, "eval_logits/rejected": -2.629824638366699, "eval_logps/chosen": -401.9931945800781, "eval_logps/rejected": -389.17608642578125, "eval_loss": 0.6410108804702759, "eval_rewards/accuracies": 0.6320000290870667, "eval_rewards/chosen": -0.6924377083778381, "eval_rewards/margins": 0.18822318315505981, "eval_rewards/rejected": -0.8806608319282532, "eval_runtime": 196.7423, "eval_samples_per_second": 10.166, "eval_steps_per_second": 5.083, "step": 1470 }, { "epoch": 0.19, "learning_rate": 4.8677838853610445e-06, "logits/chosen": -2.7825706005096436, "logits/rejected": -2.798952341079712, "logps/chosen": -395.2989501953125, "logps/rejected": -353.34814453125, "loss": 0.6412, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.6694716215133667, "rewards/margins": 0.1773361712694168, "rewards/rejected": -0.8468077778816223, "step": 1480 }, { "epoch": 0.19, "eval_logits/chosen": -2.6154189109802246, "eval_logits/rejected": -2.630645990371704, "eval_logps/chosen": -406.365478515625, "eval_logps/rejected": -393.8523864746094, "eval_loss": 0.6411867737770081, "eval_rewards/accuracies": 0.6294999718666077, "eval_rewards/chosen": -0.736160397529602, "eval_rewards/margins": 0.19126297533512115, "eval_rewards/rejected": -0.927423357963562, "eval_runtime": 196.9114, "eval_samples_per_second": 10.157, "eval_steps_per_second": 5.078, "step": 1480 }, { "epoch": 0.19, "learning_rate": 4.864093780368828e-06, "logits/chosen": -2.8738656044006348, "logits/rejected": -2.8321421146392822, "logps/chosen": -440.78955078125, "logps/rejected": -383.57940673828125, "loss": 0.6064, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.676841139793396, "rewards/margins": 0.2552284896373749, "rewards/rejected": -0.9320695996284485, "step": 1490 }, { "epoch": 0.19, "eval_logits/chosen": -2.6182310581207275, "eval_logits/rejected": -2.6331787109375, "eval_logps/chosen": -409.4355773925781, "eval_logps/rejected": -397.0263366699219, "eval_loss": 0.6417971849441528, "eval_rewards/accuracies": 0.6244999766349792, "eval_rewards/chosen": -0.7668612003326416, "eval_rewards/margins": 0.192301943898201, "eval_rewards/rejected": -0.9591631293296814, "eval_runtime": 196.9899, "eval_samples_per_second": 10.153, "eval_steps_per_second": 5.076, "step": 1490 }, { "epoch": 0.2, "learning_rate": 4.860354324831482e-06, "logits/chosen": -2.844330072402954, "logits/rejected": -2.829576015472412, "logps/chosen": -404.71185302734375, "logps/rejected": -419.93682861328125, "loss": 0.6325, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.7922731637954712, "rewards/margins": 0.1927259862422943, "rewards/rejected": -0.9849990010261536, "step": 1500 }, { "epoch": 0.2, "eval_logits/chosen": -2.621196985244751, "eval_logits/rejected": -2.6360583305358887, "eval_logps/chosen": -409.8640441894531, "eval_logps/rejected": -397.48095703125, "eval_loss": 0.6417466402053833, "eval_rewards/accuracies": 0.6284999847412109, "eval_rewards/chosen": -0.7711459994316101, "eval_rewards/margins": 0.19256363809108734, "eval_rewards/rejected": -0.9637096524238586, "eval_runtime": 197.4747, "eval_samples_per_second": 10.128, "eval_steps_per_second": 5.064, "step": 1500 }, { "epoch": 0.2, "learning_rate": 4.856565596810279e-06, "logits/chosen": -2.851569652557373, "logits/rejected": -2.8237807750701904, "logps/chosen": -342.65606689453125, "logps/rejected": -379.66656494140625, "loss": 0.6424, "rewards/accuracies": 0.625, "rewards/chosen": -0.7834426164627075, "rewards/margins": 0.1825077384710312, "rewards/rejected": -0.9659503698348999, "step": 1510 }, { "epoch": 0.2, "eval_logits/chosen": -2.6154792308807373, "eval_logits/rejected": -2.6306729316711426, "eval_logps/chosen": -406.4837646484375, "eval_logps/rejected": -394.25555419921875, "eval_loss": 0.6402400135993958, "eval_rewards/accuracies": 0.6320000290870667, "eval_rewards/chosen": -0.7373436093330383, "eval_rewards/margins": 0.19411173462867737, "eval_rewards/rejected": -0.9314553141593933, "eval_runtime": 197.2548, "eval_samples_per_second": 10.139, "eval_steps_per_second": 5.07, "step": 1510 }, { "epoch": 0.2, "learning_rate": 4.852727675395056e-06, "logits/chosen": -2.8235487937927246, "logits/rejected": -2.819708824157715, "logps/chosen": -392.69329833984375, "logps/rejected": -371.3106384277344, "loss": 0.5892, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.6893141269683838, "rewards/margins": 0.29039710760116577, "rewards/rejected": -0.9797111749649048, "step": 1520 }, { "epoch": 0.2, "eval_logits/chosen": -2.603848695755005, "eval_logits/rejected": -2.6197257041931152, "eval_logps/chosen": -412.2532043457031, "eval_logps/rejected": -401.0218505859375, "eval_loss": 0.6410880088806152, "eval_rewards/accuracies": 0.6355000138282776, "eval_rewards/chosen": -0.7950379848480225, "eval_rewards/margins": 0.20408010482788086, "eval_rewards/rejected": -0.9991180300712585, "eval_runtime": 197.1794, "eval_samples_per_second": 10.143, "eval_steps_per_second": 5.072, "step": 1520 }, { "epoch": 0.2, "learning_rate": 4.848840640702565e-06, "logits/chosen": -2.860694408416748, "logits/rejected": -2.8731682300567627, "logps/chosen": -388.0919189453125, "logps/rejected": -359.65045166015625, "loss": 0.7037, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.8457175493240356, "rewards/margins": 0.07708420604467392, "rewards/rejected": -0.9228017926216125, "step": 1530 }, { "epoch": 0.2, "eval_logits/chosen": -2.6086678504943848, "eval_logits/rejected": -2.6243414878845215, "eval_logps/chosen": -410.0699768066406, "eval_logps/rejected": -398.4937744140625, "eval_loss": 0.6403050422668457, "eval_rewards/accuracies": 0.6340000033378601, "eval_rewards/chosen": -0.7732056975364685, "eval_rewards/margins": 0.2006317377090454, "eval_rewards/rejected": -0.9738374948501587, "eval_runtime": 197.0745, "eval_samples_per_second": 10.148, "eval_steps_per_second": 5.074, "step": 1530 }, { "epoch": 0.2, "learning_rate": 4.844904573874798e-06, "logits/chosen": -2.774444103240967, "logits/rejected": -2.805631160736084, "logps/chosen": -408.6401062011719, "logps/rejected": -373.6168518066406, "loss": 0.6159, "rewards/accuracies": 0.625, "rewards/chosen": -0.6792012453079224, "rewards/margins": 0.25484994053840637, "rewards/rejected": -0.9340512156486511, "step": 1540 }, { "epoch": 0.2, "eval_logits/chosen": -2.6074650287628174, "eval_logits/rejected": -2.622997522354126, "eval_logps/chosen": -405.1261901855469, "eval_logps/rejected": -393.0093078613281, "eval_loss": 0.6390379071235657, "eval_rewards/accuracies": 0.6340000033378601, "eval_rewards/chosen": -0.7237675786018372, "eval_rewards/margins": 0.19522573053836823, "eval_rewards/rejected": -0.9189932942390442, "eval_runtime": 196.8614, "eval_samples_per_second": 10.159, "eval_steps_per_second": 5.08, "step": 1540 }, { "epoch": 0.2, "learning_rate": 4.840919557077297e-06, "logits/chosen": -2.831430435180664, "logits/rejected": -2.780000686645508, "logps/chosen": -406.05914306640625, "logps/rejected": -365.9083557128906, "loss": 0.6365, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.6498536467552185, "rewards/margins": 0.18074217438697815, "rewards/rejected": -0.8305959701538086, "step": 1550 }, { "epoch": 0.2, "eval_logits/chosen": -2.605069398880005, "eval_logits/rejected": -2.6206929683685303, "eval_logps/chosen": -405.4744873046875, "eval_logps/rejected": -393.349853515625, "eval_loss": 0.6390611529350281, "eval_rewards/accuracies": 0.6370000243186951, "eval_rewards/chosen": -0.7272511720657349, "eval_rewards/margins": 0.19514717161655426, "eval_rewards/rejected": -0.9223982691764832, "eval_runtime": 196.8733, "eval_samples_per_second": 10.159, "eval_steps_per_second": 5.079, "step": 1550 }, { "epoch": 0.2, "learning_rate": 4.836885673497435e-06, "logits/chosen": -2.8119847774505615, "logits/rejected": -2.7871992588043213, "logps/chosen": -415.7240295410156, "logps/rejected": -404.88671875, "loss": 0.6055, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.7152377963066101, "rewards/margins": 0.26963186264038086, "rewards/rejected": -0.9848695993423462, "step": 1560 }, { "epoch": 0.2, "eval_logits/chosen": -2.594703435897827, "eval_logits/rejected": -2.611009359359741, "eval_logps/chosen": -406.0096130371094, "eval_logps/rejected": -394.2928771972656, "eval_loss": 0.6390554308891296, "eval_rewards/accuracies": 0.6334999799728394, "eval_rewards/chosen": -0.7326022982597351, "eval_rewards/margins": 0.19922657310962677, "eval_rewards/rejected": -0.9318288564682007, "eval_runtime": 197.1571, "eval_samples_per_second": 10.144, "eval_steps_per_second": 5.072, "step": 1560 }, { "epoch": 0.21, "learning_rate": 4.832803007342679e-06, "logits/chosen": -2.81030011177063, "logits/rejected": -2.7911148071289062, "logps/chosen": -373.5116271972656, "logps/rejected": -403.6263122558594, "loss": 0.6129, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.7071263194084167, "rewards/margins": 0.2654086947441101, "rewards/rejected": -0.9725350141525269, "step": 1570 }, { "epoch": 0.21, "eval_logits/chosen": -2.581125497817993, "eval_logits/rejected": -2.598928928375244, "eval_logps/chosen": -403.1785583496094, "eval_logps/rejected": -391.4497375488281, "eval_loss": 0.6401770114898682, "eval_rewards/accuracies": 0.6359999775886536, "eval_rewards/chosen": -0.7042912840843201, "eval_rewards/margins": 0.19910559058189392, "eval_rewards/rejected": -0.9033968448638916, "eval_runtime": 197.0534, "eval_samples_per_second": 10.15, "eval_steps_per_second": 5.075, "step": 1570 }, { "epoch": 0.21, "learning_rate": 4.828671643838839e-06, "logits/chosen": -2.712752103805542, "logits/rejected": -2.713848829269409, "logps/chosen": -387.67559814453125, "logps/rejected": -354.3922119140625, "loss": 0.6286, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.6284931302070618, "rewards/margins": 0.22074835002422333, "rewards/rejected": -0.8492414355278015, "step": 1580 }, { "epoch": 0.21, "eval_logits/chosen": -2.5615954399108887, "eval_logits/rejected": -2.5810999870300293, "eval_logps/chosen": -411.4100036621094, "eval_logps/rejected": -401.1250915527344, "eval_loss": 0.6424925923347473, "eval_rewards/accuracies": 0.6380000114440918, "eval_rewards/chosen": -0.786605715751648, "eval_rewards/margins": 0.2135448008775711, "eval_rewards/rejected": -1.0001505613327026, "eval_runtime": 197.1811, "eval_samples_per_second": 10.143, "eval_steps_per_second": 5.071, "step": 1580 }, { "epoch": 0.21, "learning_rate": 4.824491669228279e-06, "logits/chosen": -2.6709794998168945, "logits/rejected": -2.7086164951324463, "logps/chosen": -367.11236572265625, "logps/rejected": -356.0587463378906, "loss": 0.6803, "rewards/accuracies": 0.5625, "rewards/chosen": -0.7548516988754272, "rewards/margins": 0.10954463481903076, "rewards/rejected": -0.8643962740898132, "step": 1590 }, { "epoch": 0.21, "eval_logits/chosen": -2.560438632965088, "eval_logits/rejected": -2.580240249633789, "eval_logps/chosen": -416.12518310546875, "eval_logps/rejected": -406.64556884765625, "eval_loss": 0.6424650549888611, "eval_rewards/accuracies": 0.637499988079071, "eval_rewards/chosen": -0.8337578177452087, "eval_rewards/margins": 0.22159793972969055, "eval_rewards/rejected": -1.0553555488586426, "eval_runtime": 196.9846, "eval_samples_per_second": 10.153, "eval_steps_per_second": 5.077, "step": 1590 }, { "epoch": 0.21, "learning_rate": 4.8202631707681245e-06, "logits/chosen": -2.7330760955810547, "logits/rejected": -2.6766715049743652, "logps/chosen": -382.569091796875, "logps/rejected": -383.3133850097656, "loss": 0.6037, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.8243730664253235, "rewards/margins": 0.2924764156341553, "rewards/rejected": -1.1168495416641235, "step": 1600 }, { "epoch": 0.21, "eval_logits/chosen": -2.5603301525115967, "eval_logits/rejected": -2.580162286758423, "eval_logps/chosen": -415.4783630371094, "eval_logps/rejected": -405.836181640625, "eval_loss": 0.6421064734458923, "eval_rewards/accuracies": 0.6399999856948853, "eval_rewards/chosen": -0.8272896409034729, "eval_rewards/margins": 0.21997201442718506, "eval_rewards/rejected": -1.0472615957260132, "eval_runtime": 197.0669, "eval_samples_per_second": 10.149, "eval_steps_per_second": 5.074, "step": 1600 }, { "epoch": 0.21, "learning_rate": 4.815986236728437e-06, "logits/chosen": -2.7097089290618896, "logits/rejected": -2.7386956214904785, "logps/chosen": -411.9666442871094, "logps/rejected": -413.4825134277344, "loss": 0.7021, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.9289957284927368, "rewards/margins": 0.10253496468067169, "rewards/rejected": -1.031530737876892, "step": 1610 }, { "epoch": 0.21, "eval_logits/chosen": -2.5622901916503906, "eval_logits/rejected": -2.5816233158111572, "eval_logps/chosen": -403.5919494628906, "eval_logps/rejected": -392.15234375, "eval_loss": 0.642052173614502, "eval_rewards/accuracies": 0.6340000033378601, "eval_rewards/chosen": -0.7084251046180725, "eval_rewards/margins": 0.20199787616729736, "eval_rewards/rejected": -0.9104229807853699, "eval_runtime": 196.9541, "eval_samples_per_second": 10.155, "eval_steps_per_second": 5.077, "step": 1610 }, { "epoch": 0.21, "learning_rate": 4.811660956390372e-06, "logits/chosen": -2.777519702911377, "logits/rejected": -2.7863945960998535, "logps/chosen": -442.59698486328125, "logps/rejected": -414.566650390625, "loss": 0.6388, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.674359142780304, "rewards/margins": 0.1822533905506134, "rewards/rejected": -0.8566125631332397, "step": 1620 }, { "epoch": 0.21, "eval_logits/chosen": -2.5668702125549316, "eval_logits/rejected": -2.5854568481445312, "eval_logps/chosen": -398.8391418457031, "eval_logps/rejected": -386.6023254394531, "eval_loss": 0.641032874584198, "eval_rewards/accuracies": 0.6315000057220459, "eval_rewards/chosen": -0.6608973145484924, "eval_rewards/margins": 0.194025918841362, "eval_rewards/rejected": -0.8549233078956604, "eval_runtime": 197.3348, "eval_samples_per_second": 10.135, "eval_steps_per_second": 5.068, "step": 1620 }, { "epoch": 0.21, "learning_rate": 4.807287420044319e-06, "logits/chosen": -2.8177196979522705, "logits/rejected": -2.841592311859131, "logps/chosen": -351.3625183105469, "logps/rejected": -361.21868896484375, "loss": 0.5835, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.6561750173568726, "rewards/margins": 0.33405548334121704, "rewards/rejected": -0.9902304410934448, "step": 1630 }, { "epoch": 0.21, "eval_logits/chosen": -2.559098958969116, "eval_logits/rejected": -2.577807664871216, "eval_logps/chosen": -405.84942626953125, "eval_logps/rejected": -394.82330322265625, "eval_loss": 0.6402274370193481, "eval_rewards/accuracies": 0.6365000009536743, "eval_rewards/chosen": -0.730999767780304, "eval_rewards/margins": 0.2061331868171692, "eval_rewards/rejected": -0.9371330738067627, "eval_runtime": 197.2599, "eval_samples_per_second": 10.139, "eval_steps_per_second": 5.069, "step": 1630 }, { "epoch": 0.21, "learning_rate": 4.802865718988008e-06, "logits/chosen": -2.748746633529663, "logits/rejected": -2.730214834213257, "logps/chosen": -355.8330993652344, "logps/rejected": -422.69281005859375, "loss": 0.6083, "rewards/accuracies": 0.6875, "rewards/chosen": -0.7712303996086121, "rewards/margins": 0.3000728189945221, "rewards/rejected": -1.0713032484054565, "step": 1640 }, { "epoch": 0.21, "eval_logits/chosen": -2.5498390197753906, "eval_logits/rejected": -2.5689785480499268, "eval_logps/chosen": -413.0655517578125, "eval_logps/rejected": -403.24359130859375, "eval_loss": 0.6408534646034241, "eval_rewards/accuracies": 0.6340000033378601, "eval_rewards/chosen": -0.8031615614891052, "eval_rewards/margins": 0.21817424893379211, "eval_rewards/rejected": -1.0213358402252197, "eval_runtime": 197.1474, "eval_samples_per_second": 10.145, "eval_steps_per_second": 5.072, "step": 1640 }, { "epoch": 0.22, "learning_rate": 4.798395945524615e-06, "logits/chosen": -2.8017356395721436, "logits/rejected": -2.8132927417755127, "logps/chosen": -401.31146240234375, "logps/rejected": -392.885986328125, "loss": 0.6022, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.7307563424110413, "rewards/margins": 0.2870885729789734, "rewards/rejected": -1.0178449153900146, "step": 1650 }, { "epoch": 0.22, "eval_logits/chosen": -2.5515244007110596, "eval_logits/rejected": -2.5709784030914307, "eval_logps/chosen": -419.0445861816406, "eval_logps/rejected": -410.1859436035156, "eval_loss": 0.6414780616760254, "eval_rewards/accuracies": 0.6365000009536743, "eval_rewards/chosen": -0.8629518151283264, "eval_rewards/margins": 0.2278074324131012, "eval_rewards/rejected": -1.09075927734375, "eval_runtime": 197.2586, "eval_samples_per_second": 10.139, "eval_steps_per_second": 5.069, "step": 1650 }, { "epoch": 0.22, "learning_rate": 4.793878192960823e-06, "logits/chosen": -2.798947811126709, "logits/rejected": -2.8003056049346924, "logps/chosen": -469.7757873535156, "logps/rejected": -475.6380920410156, "loss": 0.6203, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.8466376066207886, "rewards/margins": 0.2910873293876648, "rewards/rejected": -1.1377251148223877, "step": 1660 }, { "epoch": 0.22, "eval_logits/chosen": -2.5631649494171143, "eval_logits/rejected": -2.5823311805725098, "eval_logps/chosen": -417.77740478515625, "eval_logps/rejected": -408.8164978027344, "eval_loss": 0.640652596950531, "eval_rewards/accuracies": 0.6370000243186951, "eval_rewards/chosen": -0.8502798676490784, "eval_rewards/margins": 0.22678521275520325, "eval_rewards/rejected": -1.0770649909973145, "eval_runtime": 197.2142, "eval_samples_per_second": 10.141, "eval_steps_per_second": 5.071, "step": 1660 }, { "epoch": 0.22, "learning_rate": 4.789312555604887e-06, "logits/chosen": -2.800078868865967, "logits/rejected": -2.7635836601257324, "logps/chosen": -386.3752136230469, "logps/rejected": -376.82330322265625, "loss": 0.6444, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.8182946443557739, "rewards/margins": 0.21323814988136292, "rewards/rejected": -1.0315327644348145, "step": 1670 }, { "epoch": 0.22, "eval_logits/chosen": -2.573129415512085, "eval_logits/rejected": -2.5919148921966553, "eval_logps/chosen": -416.8188781738281, "eval_logps/rejected": -407.67938232421875, "eval_loss": 0.6396322250366211, "eval_rewards/accuracies": 0.6355000138282776, "eval_rewards/chosen": -0.8406945466995239, "eval_rewards/margins": 0.22499865293502808, "eval_rewards/rejected": -1.0656932592391968, "eval_runtime": 197.0109, "eval_samples_per_second": 10.152, "eval_steps_per_second": 5.076, "step": 1670 }, { "epoch": 0.22, "learning_rate": 4.784699128764654e-06, "logits/chosen": -2.8030784130096436, "logits/rejected": -2.8131110668182373, "logps/chosen": -383.93353271484375, "logps/rejected": -387.96978759765625, "loss": 0.6084, "rewards/accuracies": 0.625, "rewards/chosen": -0.7460604906082153, "rewards/margins": 0.316192626953125, "rewards/rejected": -1.0622531175613403, "step": 1680 }, { "epoch": 0.22, "eval_logits/chosen": -2.5807809829711914, "eval_logits/rejected": -2.5989623069763184, "eval_logps/chosen": -413.2848815917969, "eval_logps/rejected": -403.7291564941406, "eval_loss": 0.638802170753479, "eval_rewards/accuracies": 0.6380000114440918, "eval_rewards/chosen": -0.8053548336029053, "eval_rewards/margins": 0.22083649039268494, "eval_rewards/rejected": -1.026191234588623, "eval_runtime": 196.9795, "eval_samples_per_second": 10.153, "eval_steps_per_second": 5.077, "step": 1680 }, { "epoch": 0.22, "learning_rate": 4.780038008745581e-06, "logits/chosen": -2.791762351989746, "logits/rejected": -2.80530047416687, "logps/chosen": -440.07928466796875, "logps/rejected": -404.647216796875, "loss": 0.6797, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.8743046522140503, "rewards/margins": 0.13973672688007355, "rewards/rejected": -1.0140413045883179, "step": 1690 }, { "epoch": 0.22, "eval_logits/chosen": -2.58063006401062, "eval_logits/rejected": -2.5980546474456787, "eval_logps/chosen": -415.2863464355469, "eval_logps/rejected": -405.9435119628906, "eval_loss": 0.6387109160423279, "eval_rewards/accuracies": 0.640999972820282, "eval_rewards/chosen": -0.825369119644165, "eval_rewards/margins": 0.22296535968780518, "eval_rewards/rejected": -1.0483345985412598, "eval_runtime": 197.0549, "eval_samples_per_second": 10.149, "eval_steps_per_second": 5.075, "step": 1690 }, { "epoch": 0.22, "learning_rate": 4.775329292848721e-06, "logits/chosen": -2.744279384613037, "logits/rejected": -2.7326884269714355, "logps/chosen": -434.35479736328125, "logps/rejected": -432.7107849121094, "loss": 0.6111, "rewards/accuracies": 0.625, "rewards/chosen": -0.7824715375900269, "rewards/margins": 0.2711241543292999, "rewards/rejected": -1.053595781326294, "step": 1700 }, { "epoch": 0.22, "eval_logits/chosen": -2.5749504566192627, "eval_logits/rejected": -2.591935157775879, "eval_logps/chosen": -414.1108703613281, "eval_logps/rejected": -404.650634765625, "eval_loss": 0.6384560465812683, "eval_rewards/accuracies": 0.6389999985694885, "eval_rewards/chosen": -0.8136144280433655, "eval_rewards/margins": 0.22179150581359863, "eval_rewards/rejected": -1.0354059934616089, "eval_runtime": 197.0131, "eval_samples_per_second": 10.152, "eval_steps_per_second": 5.076, "step": 1700 }, { "epoch": 0.22, "learning_rate": 4.770573079368691e-06, "logits/chosen": -2.7748918533325195, "logits/rejected": -2.78712797164917, "logps/chosen": -386.3089294433594, "logps/rejected": -385.2343444824219, "loss": 0.6356, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.738974392414093, "rewards/margins": 0.2040799856185913, "rewards/rejected": -0.9430543184280396, "step": 1710 }, { "epoch": 0.22, "eval_logits/chosen": -2.5670783519744873, "eval_logits/rejected": -2.583617925643921, "eval_logps/chosen": -414.2315673828125, "eval_logps/rejected": -404.7968444824219, "eval_loss": 0.6393074989318848, "eval_rewards/accuracies": 0.6340000033378601, "eval_rewards/chosen": -0.8148214221000671, "eval_rewards/margins": 0.22204671800136566, "eval_rewards/rejected": -1.0368682146072388, "eval_runtime": 197.153, "eval_samples_per_second": 10.144, "eval_steps_per_second": 5.072, "step": 1710 }, { "epoch": 0.23, "learning_rate": 4.765769467591626e-06, "logits/chosen": -2.85074520111084, "logits/rejected": -2.835679769515991, "logps/chosen": -432.10015869140625, "logps/rejected": -445.62615966796875, "loss": 0.5897, "rewards/accuracies": 0.6875, "rewards/chosen": -0.7564738988876343, "rewards/margins": 0.3326117694377899, "rewards/rejected": -1.0890856981277466, "step": 1720 }, { "epoch": 0.23, "eval_logits/chosen": -2.5621254444122314, "eval_logits/rejected": -2.5779037475585938, "eval_logps/chosen": -415.5030517578125, "eval_logps/rejected": -406.2257080078125, "eval_loss": 0.6412656307220459, "eval_rewards/accuracies": 0.6349999904632568, "eval_rewards/chosen": -0.8275365829467773, "eval_rewards/margins": 0.22362031042575836, "eval_rewards/rejected": -1.0511568784713745, "eval_runtime": 197.046, "eval_samples_per_second": 10.15, "eval_steps_per_second": 5.075, "step": 1720 }, { "epoch": 0.23, "learning_rate": 4.760918557793096e-06, "logits/chosen": -2.8191890716552734, "logits/rejected": -2.869262933731079, "logps/chosen": -387.35528564453125, "logps/rejected": -415.16461181640625, "loss": 0.6146, "rewards/accuracies": 0.6875, "rewards/chosen": -0.7838236093521118, "rewards/margins": 0.272321879863739, "rewards/rejected": -1.056145429611206, "step": 1730 }, { "epoch": 0.23, "eval_logits/chosen": -2.558769702911377, "eval_logits/rejected": -2.5744855403900146, "eval_logps/chosen": -418.4196472167969, "eval_logps/rejected": -409.6731262207031, "eval_loss": 0.6429142951965332, "eval_rewards/accuracies": 0.6340000033378601, "eval_rewards/chosen": -0.8567026853561401, "eval_rewards/margins": 0.2289285808801651, "eval_rewards/rejected": -1.0856313705444336, "eval_runtime": 197.0094, "eval_samples_per_second": 10.152, "eval_steps_per_second": 5.076, "step": 1730 }, { "epoch": 0.23, "learning_rate": 4.756020451236025e-06, "logits/chosen": -2.7810559272766113, "logits/rejected": -2.7768383026123047, "logps/chosen": -457.4143981933594, "logps/rejected": -444.75103759765625, "loss": 0.6418, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.8151463270187378, "rewards/margins": 0.20836929976940155, "rewards/rejected": -1.0235155820846558, "step": 1740 }, { "epoch": 0.23, "eval_logits/chosen": -2.5561814308166504, "eval_logits/rejected": -2.5724422931671143, "eval_logps/chosen": -423.25457763671875, "eval_logps/rejected": -415.39715576171875, "eval_loss": 0.6437353491783142, "eval_rewards/accuracies": 0.6294999718666077, "eval_rewards/chosen": -0.9050517082214355, "eval_rewards/margins": 0.23781974613666534, "eval_rewards/rejected": -1.142871618270874, "eval_runtime": 197.0793, "eval_samples_per_second": 10.148, "eval_steps_per_second": 5.074, "step": 1740 }, { "epoch": 0.23, "learning_rate": 4.751075250168569e-06, "logits/chosen": -2.835005044937134, "logits/rejected": -2.7781484127044678, "logps/chosen": -424.66680908203125, "logps/rejected": -400.25689697265625, "loss": 0.6322, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.9134689569473267, "rewards/margins": 0.2810933589935303, "rewards/rejected": -1.194562315940857, "step": 1750 }, { "epoch": 0.23, "eval_logits/chosen": -2.5711569786071777, "eval_logits/rejected": -2.58683180809021, "eval_logps/chosen": -418.1545715332031, "eval_logps/rejected": -409.7334899902344, "eval_loss": 0.6412755846977234, "eval_rewards/accuracies": 0.6340000033378601, "eval_rewards/chosen": -0.8540514707565308, "eval_rewards/margins": 0.2321833074092865, "eval_rewards/rejected": -1.0862348079681396, "eval_runtime": 197.233, "eval_samples_per_second": 10.14, "eval_steps_per_second": 5.07, "step": 1750 }, { "epoch": 0.23, "learning_rate": 4.746083057821981e-06, "logits/chosen": -2.772454023361206, "logits/rejected": -2.716813802719116, "logps/chosen": -393.3017578125, "logps/rejected": -371.563720703125, "loss": 0.628, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.802009105682373, "rewards/margins": 0.30267855525016785, "rewards/rejected": -1.1046876907348633, "step": 1760 }, { "epoch": 0.23, "eval_logits/chosen": -2.6006784439086914, "eval_logits/rejected": -2.6152594089508057, "eval_logps/chosen": -411.67431640625, "eval_logps/rejected": -402.28314208984375, "eval_loss": 0.6368669867515564, "eval_rewards/accuracies": 0.6309999823570251, "eval_rewards/chosen": -0.7892491221427917, "eval_rewards/margins": 0.22248202562332153, "eval_rewards/rejected": -1.0117310285568237, "eval_runtime": 197.0626, "eval_samples_per_second": 10.149, "eval_steps_per_second": 5.075, "step": 1760 }, { "epoch": 0.23, "learning_rate": 4.741043978408463e-06, "logits/chosen": -2.781284809112549, "logits/rejected": -2.7620162963867188, "logps/chosen": -382.1107177734375, "logps/rejected": -418.96221923828125, "loss": 0.5509, "rewards/accuracies": 0.6875, "rewards/chosen": -0.6648024916648865, "rewards/margins": 0.4300464689731598, "rewards/rejected": -1.0948489904403687, "step": 1770 }, { "epoch": 0.23, "eval_logits/chosen": -2.604132652282715, "eval_logits/rejected": -2.619030475616455, "eval_logps/chosen": -411.5599365234375, "eval_logps/rejected": -402.39544677734375, "eval_loss": 0.6368661522865295, "eval_rewards/accuracies": 0.6290000081062317, "eval_rewards/chosen": -0.7881054282188416, "eval_rewards/margins": 0.22474880516529083, "eval_rewards/rejected": -1.0128542184829712, "eval_runtime": 197.048, "eval_samples_per_second": 10.15, "eval_steps_per_second": 5.075, "step": 1770 }, { "epoch": 0.23, "learning_rate": 4.735958117118983e-06, "logits/chosen": -2.8062703609466553, "logits/rejected": -2.8215584754943848, "logps/chosen": -431.8072204589844, "logps/rejected": -423.52813720703125, "loss": 0.5959, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.6866645812988281, "rewards/margins": 0.31908783316612244, "rewards/rejected": -1.0057523250579834, "step": 1780 }, { "epoch": 0.23, "eval_logits/chosen": -2.6018896102905273, "eval_logits/rejected": -2.617478847503662, "eval_logps/chosen": -407.46307373046875, "eval_logps/rejected": -397.8551940917969, "eval_loss": 0.6371034979820251, "eval_rewards/accuracies": 0.6294999718666077, "eval_rewards/chosen": -0.747136116027832, "eval_rewards/margins": 0.2203156054019928, "eval_rewards/rejected": -0.9674516320228577, "eval_runtime": 196.9701, "eval_samples_per_second": 10.154, "eval_steps_per_second": 5.077, "step": 1780 }, { "epoch": 0.23, "learning_rate": 4.730825580121084e-06, "logits/chosen": -2.8423948287963867, "logits/rejected": -2.8654932975769043, "logps/chosen": -381.26007080078125, "logps/rejected": -395.092529296875, "loss": 0.6137, "rewards/accuracies": 0.6875, "rewards/chosen": -0.7086172699928284, "rewards/margins": 0.26496243476867676, "rewards/rejected": -0.9735797047615051, "step": 1790 }, { "epoch": 0.23, "eval_logits/chosen": -2.5999481678009033, "eval_logits/rejected": -2.615683078765869, "eval_logps/chosen": -411.8017272949219, "eval_logps/rejected": -402.99560546875, "eval_loss": 0.6369568109512329, "eval_rewards/accuracies": 0.6265000104904175, "eval_rewards/chosen": -0.7905230522155762, "eval_rewards/margins": 0.22833256423473358, "eval_rewards/rejected": -1.0188556909561157, "eval_runtime": 197.0514, "eval_samples_per_second": 10.15, "eval_steps_per_second": 5.075, "step": 1790 }, { "epoch": 0.24, "learning_rate": 4.725646474556666e-06, "logits/chosen": -2.830599069595337, "logits/rejected": -2.8327298164367676, "logps/chosen": -359.54388427734375, "logps/rejected": -399.54827880859375, "loss": 0.6518, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.7748910188674927, "rewards/margins": 0.28433313965797424, "rewards/rejected": -1.059224247932434, "step": 1800 }, { "epoch": 0.24, "eval_logits/chosen": -2.5959720611572266, "eval_logits/rejected": -2.612139940261841, "eval_logps/chosen": -415.753662109375, "eval_logps/rejected": -407.6683349609375, "eval_loss": 0.6366816759109497, "eval_rewards/accuracies": 0.6309999823570251, "eval_rewards/chosen": -0.8300423622131348, "eval_rewards/margins": 0.23554080724716187, "eval_rewards/rejected": -1.0655831098556519, "eval_runtime": 196.8886, "eval_samples_per_second": 10.158, "eval_steps_per_second": 5.079, "step": 1800 }, { "epoch": 0.24, "learning_rate": 4.720420908539748e-06, "logits/chosen": -2.840127468109131, "logits/rejected": -2.816035509109497, "logps/chosen": -392.6610107421875, "logps/rejected": -403.17266845703125, "loss": 0.6444, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.8145529627799988, "rewards/margins": 0.18399588763713837, "rewards/rejected": -0.9985488653182983, "step": 1810 }, { "epoch": 0.24, "eval_logits/chosen": -2.5939080715179443, "eval_logits/rejected": -2.610529899597168, "eval_logps/chosen": -414.0361022949219, "eval_logps/rejected": -405.6206970214844, "eval_loss": 0.635891854763031, "eval_rewards/accuracies": 0.6315000057220459, "eval_rewards/chosen": -0.8128669857978821, "eval_rewards/margins": 0.23223945498466492, "eval_rewards/rejected": -1.0451064109802246, "eval_runtime": 196.9676, "eval_samples_per_second": 10.154, "eval_steps_per_second": 5.077, "step": 1810 }, { "epoch": 0.24, "learning_rate": 4.715148991154216e-06, "logits/chosen": -2.904259204864502, "logits/rejected": -2.9085910320281982, "logps/chosen": -504.03497314453125, "logps/rejected": -511.499755859375, "loss": 0.645, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.8387717008590698, "rewards/margins": 0.20440442860126495, "rewards/rejected": -1.0431760549545288, "step": 1820 }, { "epoch": 0.24, "eval_logits/chosen": -2.588139533996582, "eval_logits/rejected": -2.6048943996429443, "eval_logps/chosen": -413.64398193359375, "eval_logps/rejected": -405.2970886230469, "eval_loss": 0.63616943359375, "eval_rewards/accuracies": 0.6334999799728394, "eval_rewards/chosen": -0.80894535779953, "eval_rewards/margins": 0.2329251766204834, "eval_rewards/rejected": -1.0418705940246582, "eval_runtime": 197.0152, "eval_samples_per_second": 10.151, "eval_steps_per_second": 5.076, "step": 1820 }, { "epoch": 0.24, "learning_rate": 4.709830832451538e-06, "logits/chosen": -2.843167781829834, "logits/rejected": -2.848705768585205, "logps/chosen": -467.8697814941406, "logps/rejected": -467.73309326171875, "loss": 0.6188, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.8765204548835754, "rewards/margins": 0.2673446536064148, "rewards/rejected": -1.1438651084899902, "step": 1830 }, { "epoch": 0.24, "eval_logits/chosen": -2.5781211853027344, "eval_logits/rejected": -2.595245122909546, "eval_logps/chosen": -419.8053894042969, "eval_logps/rejected": -412.53533935546875, "eval_loss": 0.6371971368789673, "eval_rewards/accuracies": 0.6265000104904175, "eval_rewards/chosen": -0.8705599308013916, "eval_rewards/margins": 0.24369306862354279, "eval_rewards/rejected": -1.1142529249191284, "eval_runtime": 196.9225, "eval_samples_per_second": 10.156, "eval_steps_per_second": 5.078, "step": 1830 }, { "epoch": 0.24, "learning_rate": 4.704466543448477e-06, "logits/chosen": -2.710594654083252, "logits/rejected": -2.70381498336792, "logps/chosen": -495.4413146972656, "logps/rejected": -459.764404296875, "loss": 0.597, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.8346297144889832, "rewards/margins": 0.32266736030578613, "rewards/rejected": -1.1572970151901245, "step": 1840 }, { "epoch": 0.24, "eval_logits/chosen": -2.572765350341797, "eval_logits/rejected": -2.590297222137451, "eval_logps/chosen": -416.5643005371094, "eval_logps/rejected": -409.066650390625, "eval_loss": 0.6378411650657654, "eval_rewards/accuracies": 0.6244999766349792, "eval_rewards/chosen": -0.8381485939025879, "eval_rewards/margins": 0.24141810834407806, "eval_rewards/rejected": -1.0795667171478271, "eval_runtime": 197.3294, "eval_samples_per_second": 10.135, "eval_steps_per_second": 5.068, "step": 1840 }, { "epoch": 0.24, "learning_rate": 4.699056236124762e-06, "logits/chosen": -2.7791919708251953, "logits/rejected": -2.8077704906463623, "logps/chosen": -398.36260986328125, "logps/rejected": -419.29071044921875, "loss": 0.6169, "rewards/accuracies": 0.6875, "rewards/chosen": -0.7714325189590454, "rewards/margins": 0.26381996273994446, "rewards/rejected": -1.0352524518966675, "step": 1850 }, { "epoch": 0.24, "eval_logits/chosen": -2.5729434490203857, "eval_logits/rejected": -2.590017080307007, "eval_logps/chosen": -417.69183349609375, "eval_logps/rejected": -410.3074645996094, "eval_loss": 0.6380077600479126, "eval_rewards/accuracies": 0.6255000233650208, "eval_rewards/chosen": -0.8494245409965515, "eval_rewards/margins": 0.24255014955997467, "eval_rewards/rejected": -1.0919746160507202, "eval_runtime": 197.0483, "eval_samples_per_second": 10.15, "eval_steps_per_second": 5.075, "step": 1850 }, { "epoch": 0.24, "learning_rate": 4.693600023420758e-06, "logits/chosen": -2.8519492149353027, "logits/rejected": -2.817288875579834, "logps/chosen": -445.31585693359375, "logps/rejected": -393.69781494140625, "loss": 0.5578, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.7918586730957031, "rewards/margins": 0.4510478973388672, "rewards/rejected": -1.2429064512252808, "step": 1860 }, { "epoch": 0.24, "eval_logits/chosen": -2.564570665359497, "eval_logits/rejected": -2.5810608863830566, "eval_logps/chosen": -421.4671630859375, "eval_logps/rejected": -414.6640625, "eval_loss": 0.6403253078460693, "eval_rewards/accuracies": 0.6255000233650208, "eval_rewards/chosen": -0.8871776461601257, "eval_rewards/margins": 0.24836279451847076, "eval_rewards/rejected": -1.1355403661727905, "eval_runtime": 197.0186, "eval_samples_per_second": 10.151, "eval_steps_per_second": 5.076, "step": 1860 }, { "epoch": 0.24, "learning_rate": 4.688098019235108e-06, "logits/chosen": -2.7748916149139404, "logits/rejected": -2.7554211616516113, "logps/chosen": -453.495361328125, "logps/rejected": -460.4736328125, "loss": 0.6017, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.8699310421943665, "rewards/margins": 0.34119826555252075, "rewards/rejected": -1.2111294269561768, "step": 1870 }, { "epoch": 0.24, "eval_logits/chosen": -2.5702080726623535, "eval_logits/rejected": -2.5860989093780518, "eval_logps/chosen": -423.2950134277344, "eval_logps/rejected": -416.7840576171875, "eval_loss": 0.6397432088851929, "eval_rewards/accuracies": 0.6244999766349792, "eval_rewards/chosen": -0.9054557085037231, "eval_rewards/margins": 0.25128448009490967, "eval_rewards/rejected": -1.1567401885986328, "eval_runtime": 197.0154, "eval_samples_per_second": 10.151, "eval_steps_per_second": 5.076, "step": 1870 }, { "epoch": 0.25, "learning_rate": 4.682550338422353e-06, "logits/chosen": -2.7921640872955322, "logits/rejected": -2.791607618331909, "logps/chosen": -424.34735107421875, "logps/rejected": -395.5057373046875, "loss": 0.6193, "rewards/accuracies": 0.625, "rewards/chosen": -0.9227128028869629, "rewards/margins": 0.2798658609390259, "rewards/rejected": -1.2025786638259888, "step": 1880 }, { "epoch": 0.25, "eval_logits/chosen": -2.5783560276031494, "eval_logits/rejected": -2.594203472137451, "eval_logps/chosen": -423.9530944824219, "eval_logps/rejected": -417.4391784667969, "eval_loss": 0.6378757357597351, "eval_rewards/accuracies": 0.628000020980835, "eval_rewards/chosen": -0.9120365977287292, "eval_rewards/margins": 0.2512553036212921, "eval_rewards/rejected": -1.1632920503616333, "eval_runtime": 196.9999, "eval_samples_per_second": 10.152, "eval_steps_per_second": 5.076, "step": 1880 }, { "epoch": 0.25, "learning_rate": 4.676957096790536e-06, "logits/chosen": -2.652641773223877, "logits/rejected": -2.653254985809326, "logps/chosen": -421.16961669921875, "logps/rejected": -393.53240966796875, "loss": 0.6376, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.8513861894607544, "rewards/margins": 0.23414048552513123, "rewards/rejected": -1.085526704788208, "step": 1890 }, { "epoch": 0.25, "eval_logits/chosen": -2.5858771800994873, "eval_logits/rejected": -2.6016323566436768, "eval_logps/chosen": -427.589111328125, "eval_logps/rejected": -421.1734924316406, "eval_loss": 0.6371917724609375, "eval_rewards/accuracies": 0.6305000185966492, "eval_rewards/chosen": -0.9483969211578369, "eval_rewards/margins": 0.2522384226322174, "eval_rewards/rejected": -1.2006351947784424, "eval_runtime": 196.8632, "eval_samples_per_second": 10.159, "eval_steps_per_second": 5.08, "step": 1890 }, { "epoch": 0.25, "learning_rate": 4.671318411098782e-06, "logits/chosen": -2.721386432647705, "logits/rejected": -2.8073456287384033, "logps/chosen": -433.65435791015625, "logps/rejected": -459.4867248535156, "loss": 0.6282, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.8606206178665161, "rewards/margins": 0.3143337070941925, "rewards/rejected": -1.1749542951583862, "step": 1900 }, { "epoch": 0.25, "eval_logits/chosen": -2.5904347896575928, "eval_logits/rejected": -2.606128215789795, "eval_logps/chosen": -426.4163513183594, "eval_logps/rejected": -419.6851806640625, "eval_loss": 0.6361418962478638, "eval_rewards/accuracies": 0.6334999799728394, "eval_rewards/chosen": -0.9366695284843445, "eval_rewards/margins": 0.24908219277858734, "eval_rewards/rejected": -1.1857519149780273, "eval_runtime": 196.9151, "eval_samples_per_second": 10.157, "eval_steps_per_second": 5.078, "step": 1900 }, { "epoch": 0.25, "learning_rate": 4.665634399054864e-06, "logits/chosen": -2.705906867980957, "logits/rejected": -2.770385980606079, "logps/chosen": -397.36676025390625, "logps/rejected": -405.4843444824219, "loss": 0.6556, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.9889096021652222, "rewards/margins": 0.21706286072731018, "rewards/rejected": -1.2059725522994995, "step": 1910 }, { "epoch": 0.25, "eval_logits/chosen": -2.5925650596618652, "eval_logits/rejected": -2.608245611190796, "eval_logps/chosen": -424.7822570800781, "eval_logps/rejected": -417.7047424316406, "eval_loss": 0.6357632875442505, "eval_rewards/accuracies": 0.6315000057220459, "eval_rewards/chosen": -0.9203288555145264, "eval_rewards/margins": 0.24561835825443268, "eval_rewards/rejected": -1.1659470796585083, "eval_runtime": 197.5903, "eval_samples_per_second": 10.122, "eval_steps_per_second": 5.061, "step": 1910 }, { "epoch": 0.25, "learning_rate": 4.659905179312743e-06, "logits/chosen": -2.8598313331604004, "logits/rejected": -2.8456664085388184, "logps/chosen": -448.54425048828125, "logps/rejected": -401.2884521484375, "loss": 0.6259, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.866184413433075, "rewards/margins": 0.26407763361930847, "rewards/rejected": -1.130262017250061, "step": 1920 }, { "epoch": 0.25, "eval_logits/chosen": -2.6057279109954834, "eval_logits/rejected": -2.6205661296844482, "eval_logps/chosen": -417.2929382324219, "eval_logps/rejected": -409.1140441894531, "eval_loss": 0.6337299942970276, "eval_rewards/accuracies": 0.6340000033378601, "eval_rewards/chosen": -0.8454354405403137, "eval_rewards/margins": 0.23460477590560913, "eval_rewards/rejected": -1.0800403356552124, "eval_runtime": 197.1043, "eval_samples_per_second": 10.147, "eval_steps_per_second": 5.073, "step": 1920 }, { "epoch": 0.25, "learning_rate": 4.654130871470093e-06, "logits/chosen": -2.7806954383850098, "logits/rejected": -2.756470203399658, "logps/chosen": -415.10272216796875, "logps/rejected": -368.8055114746094, "loss": 0.7005, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.8875184059143066, "rewards/margins": 0.07059729844331741, "rewards/rejected": -0.9581157565116882, "step": 1930 }, { "epoch": 0.25, "eval_logits/chosen": -2.6222054958343506, "eval_logits/rejected": -2.636209487915039, "eval_logps/chosen": -413.4906005859375, "eval_logps/rejected": -404.2983703613281, "eval_loss": 0.6328663229942322, "eval_rewards/accuracies": 0.6389999985694885, "eval_rewards/chosen": -0.807411789894104, "eval_rewards/margins": 0.22447140514850616, "eval_rewards/rejected": -1.0318833589553833, "eval_runtime": 197.0477, "eval_samples_per_second": 10.15, "eval_steps_per_second": 5.075, "step": 1930 }, { "epoch": 0.25, "learning_rate": 4.6483115960658045e-06, "logits/chosen": -2.877629518508911, "logits/rejected": -2.865546464920044, "logps/chosen": -413.8694763183594, "logps/rejected": -342.9363098144531, "loss": 0.6331, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.7345770597457886, "rewards/margins": 0.18642066419124603, "rewards/rejected": -0.9209977388381958, "step": 1940 }, { "epoch": 0.25, "eval_logits/chosen": -2.629322052001953, "eval_logits/rejected": -2.6429662704467773, "eval_logps/chosen": -411.325927734375, "eval_logps/rejected": -401.53961181640625, "eval_loss": 0.6325713992118835, "eval_rewards/accuracies": 0.6349999904632568, "eval_rewards/chosen": -0.7857657074928284, "eval_rewards/margins": 0.21853068470954895, "eval_rewards/rejected": -1.0042963027954102, "eval_runtime": 197.0888, "eval_samples_per_second": 10.148, "eval_steps_per_second": 5.074, "step": 1940 }, { "epoch": 0.26, "learning_rate": 4.642447474577466e-06, "logits/chosen": -2.7526779174804688, "logits/rejected": -2.7635135650634766, "logps/chosen": -373.68670654296875, "logps/rejected": -378.1413269042969, "loss": 0.6362, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.799132227897644, "rewards/margins": 0.19670510292053223, "rewards/rejected": -0.9958373308181763, "step": 1950 }, { "epoch": 0.26, "eval_logits/chosen": -2.6297872066497803, "eval_logits/rejected": -2.64349627494812, "eval_logps/chosen": -410.59429931640625, "eval_logps/rejected": -400.9759521484375, "eval_loss": 0.6317591667175293, "eval_rewards/accuracies": 0.637499988079071, "eval_rewards/chosen": -0.7784488201141357, "eval_rewards/margins": 0.22021029889583588, "eval_rewards/rejected": -0.9986591339111328, "eval_runtime": 197.0825, "eval_samples_per_second": 10.148, "eval_steps_per_second": 5.074, "step": 1950 }, { "epoch": 0.26, "learning_rate": 4.636538629418832e-06, "logits/chosen": -2.811131715774536, "logits/rejected": -2.8222975730895996, "logps/chosen": -440.174560546875, "logps/rejected": -429.7535095214844, "loss": 0.5862, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.7554556131362915, "rewards/margins": 0.3168772757053375, "rewards/rejected": -1.0723329782485962, "step": 1960 }, { "epoch": 0.26, "eval_logits/chosen": -2.620647668838501, "eval_logits/rejected": -2.634829044342041, "eval_logps/chosen": -415.5986328125, "eval_logps/rejected": -407.06268310546875, "eval_loss": 0.6318819522857666, "eval_rewards/accuracies": 0.6355000138282776, "eval_rewards/chosen": -0.8284925222396851, "eval_rewards/margins": 0.23103398084640503, "eval_rewards/rejected": -1.0595263242721558, "eval_runtime": 197.1465, "eval_samples_per_second": 10.145, "eval_steps_per_second": 5.072, "step": 1960 }, { "epoch": 0.26, "learning_rate": 4.630585183937263e-06, "logits/chosen": -2.806405544281006, "logits/rejected": -2.7973275184631348, "logps/chosen": -413.4725646972656, "logps/rejected": -394.82708740234375, "loss": 0.6907, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.74481201171875, "rewards/margins": 0.07658366113901138, "rewards/rejected": -0.8213956952095032, "step": 1970 }, { "epoch": 0.26, "eval_logits/chosen": -2.615365743637085, "eval_logits/rejected": -2.6301496028900146, "eval_logps/chosen": -410.3633117675781, "eval_logps/rejected": -401.2737731933594, "eval_loss": 0.6315578818321228, "eval_rewards/accuracies": 0.6345000267028809, "eval_rewards/chosen": -0.7761390209197998, "eval_rewards/margins": 0.2254989594221115, "eval_rewards/rejected": -1.0016380548477173, "eval_runtime": 197.0852, "eval_samples_per_second": 10.148, "eval_steps_per_second": 5.074, "step": 1970 }, { "epoch": 0.26, "learning_rate": 4.6245872624111535e-06, "logits/chosen": -2.8345344066619873, "logits/rejected": -2.8294196128845215, "logps/chosen": -349.9237060546875, "logps/rejected": -348.3368835449219, "loss": 0.6349, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.6936440467834473, "rewards/margins": 0.2063537836074829, "rewards/rejected": -0.8999978303909302, "step": 1980 }, { "epoch": 0.26, "eval_logits/chosen": -2.6132729053497314, "eval_logits/rejected": -2.6283042430877686, "eval_logps/chosen": -407.8918151855469, "eval_logps/rejected": -398.5971374511719, "eval_loss": 0.6315102577209473, "eval_rewards/accuracies": 0.6330000162124634, "eval_rewards/chosen": -0.751424252986908, "eval_rewards/margins": 0.22344675660133362, "eval_rewards/rejected": -0.9748709797859192, "eval_runtime": 197.0545, "eval_samples_per_second": 10.149, "eval_steps_per_second": 5.075, "step": 1980 }, { "epoch": 0.26, "learning_rate": 4.618544990047336e-06, "logits/chosen": -2.8143086433410645, "logits/rejected": -2.787330150604248, "logps/chosen": -453.98297119140625, "logps/rejected": -445.3204040527344, "loss": 0.618, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.781507134437561, "rewards/margins": 0.27939194440841675, "rewards/rejected": -1.060899019241333, "step": 1990 }, { "epoch": 0.26, "eval_logits/chosen": -2.6105549335479736, "eval_logits/rejected": -2.625771999359131, "eval_logps/chosen": -417.1804504394531, "eval_logps/rejected": -409.4155578613281, "eval_loss": 0.6315101385116577, "eval_rewards/accuracies": 0.6414999961853027, "eval_rewards/chosen": -0.8443105220794678, "eval_rewards/margins": 0.23874500393867493, "eval_rewards/rejected": -1.0830554962158203, "eval_runtime": 197.0596, "eval_samples_per_second": 10.149, "eval_steps_per_second": 5.075, "step": 1990 }, { "epoch": 0.26, "learning_rate": 4.612458492978473e-06, "logits/chosen": -2.8706493377685547, "logits/rejected": -2.8462719917297363, "logps/chosen": -397.9223327636719, "logps/rejected": -415.9630432128906, "loss": 0.6924, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.9325806498527527, "rewards/margins": 0.136087566614151, "rewards/rejected": -1.0686681270599365, "step": 2000 }, { "epoch": 0.26, "eval_logits/chosen": -2.6107311248779297, "eval_logits/rejected": -2.6258249282836914, "eval_logps/chosen": -417.8204345703125, "eval_logps/rejected": -410.0538330078125, "eval_loss": 0.6316912174224854, "eval_rewards/accuracies": 0.6389999985694885, "eval_rewards/chosen": -0.8507106900215149, "eval_rewards/margins": 0.23872776329517365, "eval_rewards/rejected": -1.0894384384155273, "eval_runtime": 197.0063, "eval_samples_per_second": 10.152, "eval_steps_per_second": 5.076, "step": 2000 }, { "epoch": 0.26, "learning_rate": 4.606327898260413e-06, "logits/chosen": -2.686081647872925, "logits/rejected": -2.7080864906311035, "logps/chosen": -447.25384521484375, "logps/rejected": -430.2577209472656, "loss": 0.6461, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.8990098237991333, "rewards/margins": 0.2516574263572693, "rewards/rejected": -1.1506671905517578, "step": 2010 }, { "epoch": 0.26, "eval_logits/chosen": -2.612136125564575, "eval_logits/rejected": -2.62685227394104, "eval_logps/chosen": -412.8479309082031, "eval_logps/rejected": -404.4166259765625, "eval_loss": 0.630695641040802, "eval_rewards/accuracies": 0.640500009059906, "eval_rewards/chosen": -0.8009849786758423, "eval_rewards/margins": 0.2320813089609146, "eval_rewards/rejected": -1.0330662727355957, "eval_runtime": 196.7936, "eval_samples_per_second": 10.163, "eval_steps_per_second": 5.081, "step": 2010 }, { "epoch": 0.26, "learning_rate": 4.600153333869549e-06, "logits/chosen": -2.8086211681365967, "logits/rejected": -2.819854736328125, "logps/chosen": -422.72161865234375, "logps/rejected": -394.31787109375, "loss": 0.6233, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.7718098163604736, "rewards/margins": 0.24191728234291077, "rewards/rejected": -1.013727068901062, "step": 2020 }, { "epoch": 0.26, "eval_logits/chosen": -2.609605073928833, "eval_logits/rejected": -2.624340772628784, "eval_logps/chosen": -409.9208068847656, "eval_logps/rejected": -401.1809997558594, "eval_loss": 0.6306189298629761, "eval_rewards/accuracies": 0.640500009059906, "eval_rewards/chosen": -0.7717139720916748, "eval_rewards/margins": 0.22899581491947174, "eval_rewards/rejected": -1.000709891319275, "eval_runtime": 196.9939, "eval_samples_per_second": 10.153, "eval_steps_per_second": 5.076, "step": 2020 }, { "epoch": 0.27, "learning_rate": 4.593934928700141e-06, "logits/chosen": -2.841212749481201, "logits/rejected": -2.8480188846588135, "logps/chosen": -415.624755859375, "logps/rejected": -377.48773193359375, "loss": 0.6237, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.7407634854316711, "rewards/margins": 0.24683237075805664, "rewards/rejected": -0.9875958561897278, "step": 2030 }, { "epoch": 0.27, "eval_logits/chosen": -2.607243299484253, "eval_logits/rejected": -2.6218373775482178, "eval_logps/chosen": -406.3677978515625, "eval_logps/rejected": -397.22369384765625, "eval_loss": 0.630490243434906, "eval_rewards/accuracies": 0.6380000114440918, "eval_rewards/chosen": -0.736184298992157, "eval_rewards/margins": 0.22495214641094208, "eval_rewards/rejected": -0.961136519908905, "eval_runtime": 196.961, "eval_samples_per_second": 10.154, "eval_steps_per_second": 5.077, "step": 2030 }, { "epoch": 0.27, "learning_rate": 4.587672812561626e-06, "logits/chosen": -2.81145977973938, "logits/rejected": -2.781007766723633, "logps/chosen": -369.285400390625, "logps/rejected": -425.7210998535156, "loss": 0.5939, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.7476687431335449, "rewards/margins": 0.28507062792778015, "rewards/rejected": -1.0327394008636475, "step": 2040 }, { "epoch": 0.27, "eval_logits/chosen": -2.6005775928497314, "eval_logits/rejected": -2.6154563426971436, "eval_logps/chosen": -408.3466796875, "eval_logps/rejected": -399.87847900390625, "eval_loss": 0.6307638883590698, "eval_rewards/accuracies": 0.6384999752044678, "eval_rewards/chosen": -0.7559728622436523, "eval_rewards/margins": 0.23171177506446838, "eval_rewards/rejected": -0.9876845479011536, "eval_runtime": 196.9073, "eval_samples_per_second": 10.157, "eval_steps_per_second": 5.079, "step": 2040 }, { "epoch": 0.27, "learning_rate": 4.581367116175911e-06, "logits/chosen": -2.7396187782287598, "logits/rejected": -2.731571912765503, "logps/chosen": -433.5108337402344, "logps/rejected": -405.5694885253906, "loss": 0.5974, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.693037748336792, "rewards/margins": 0.31083375215530396, "rewards/rejected": -1.0038714408874512, "step": 2050 }, { "epoch": 0.27, "eval_logits/chosen": -2.5866856575012207, "eval_logits/rejected": -2.6025893688201904, "eval_logps/chosen": -409.65179443359375, "eval_logps/rejected": -401.7351379394531, "eval_loss": 0.6328474283218384, "eval_rewards/accuracies": 0.6389999985694885, "eval_rewards/chosen": -0.769023597240448, "eval_rewards/margins": 0.2372276335954666, "eval_rewards/rejected": -1.0062512159347534, "eval_runtime": 197.2625, "eval_samples_per_second": 10.139, "eval_steps_per_second": 5.069, "step": 2050 }, { "epoch": 0.27, "learning_rate": 4.5750179711746416e-06, "logits/chosen": -2.7967312335968018, "logits/rejected": -2.7692575454711914, "logps/chosen": -399.40399169921875, "logps/rejected": -404.92596435546875, "loss": 0.6569, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.7823539972305298, "rewards/margins": 0.16327540576457977, "rewards/rejected": -0.9456294178962708, "step": 2060 }, { "epoch": 0.27, "eval_logits/chosen": -2.5818114280700684, "eval_logits/rejected": -2.598083734512329, "eval_logps/chosen": -414.1978759765625, "eval_logps/rejected": -406.9813537597656, "eval_loss": 0.6336009502410889, "eval_rewards/accuracies": 0.6370000243186951, "eval_rewards/chosen": -0.8144845962524414, "eval_rewards/margins": 0.24422858655452728, "eval_rewards/rejected": -1.0587131977081299, "eval_runtime": 196.8703, "eval_samples_per_second": 10.159, "eval_steps_per_second": 5.079, "step": 2060 }, { "epoch": 0.27, "learning_rate": 4.5686255100964535e-06, "logits/chosen": -2.845377206802368, "logits/rejected": -2.8053154945373535, "logps/chosen": -410.73785400390625, "logps/rejected": -380.6125183105469, "loss": 0.6322, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.8289012908935547, "rewards/margins": 0.2113029509782791, "rewards/rejected": -1.0402042865753174, "step": 2070 }, { "epoch": 0.27, "eval_logits/chosen": -2.5877645015716553, "eval_logits/rejected": -2.604356527328491, "eval_logps/chosen": -417.6963195800781, "eval_logps/rejected": -411.0251770019531, "eval_loss": 0.6324384212493896, "eval_rewards/accuracies": 0.640999972820282, "eval_rewards/chosen": -0.8494692444801331, "eval_rewards/margins": 0.24968257546424866, "eval_rewards/rejected": -1.0991517305374146, "eval_runtime": 196.9005, "eval_samples_per_second": 10.157, "eval_steps_per_second": 5.079, "step": 2070 }, { "epoch": 0.27, "learning_rate": 4.562189866384209e-06, "logits/chosen": -2.691206932067871, "logits/rejected": -2.7267496585845947, "logps/chosen": -375.44580078125, "logps/rejected": -422.0435485839844, "loss": 0.6262, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.8189651370048523, "rewards/margins": 0.2774657607078552, "rewards/rejected": -1.0964308977127075, "step": 2080 }, { "epoch": 0.27, "eval_logits/chosen": -2.592376470565796, "eval_logits/rejected": -2.608642101287842, "eval_logps/chosen": -422.14459228515625, "eval_logps/rejected": -415.8671569824219, "eval_loss": 0.6311394572257996, "eval_rewards/accuracies": 0.6399999856948853, "eval_rewards/chosen": -0.8939514756202698, "eval_rewards/margins": 0.25362005829811096, "eval_rewards/rejected": -1.1475715637207031, "eval_runtime": 197.0619, "eval_samples_per_second": 10.149, "eval_steps_per_second": 5.075, "step": 2080 }, { "epoch": 0.27, "learning_rate": 4.555711174382209e-06, "logits/chosen": -2.811758518218994, "logits/rejected": -2.8001110553741455, "logps/chosen": -375.446533203125, "logps/rejected": -360.69464111328125, "loss": 0.6663, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.8602222204208374, "rewards/margins": 0.16579048335552216, "rewards/rejected": -1.026012659072876, "step": 2090 }, { "epoch": 0.27, "eval_logits/chosen": -2.5941474437713623, "eval_logits/rejected": -2.6107828617095947, "eval_logps/chosen": -421.90533447265625, "eval_logps/rejected": -415.3799133300781, "eval_loss": 0.6304261684417725, "eval_rewards/accuracies": 0.6424999833106995, "eval_rewards/chosen": -0.8915589451789856, "eval_rewards/margins": 0.25113990902900696, "eval_rewards/rejected": -1.142698884010315, "eval_runtime": 197.0368, "eval_samples_per_second": 10.15, "eval_steps_per_second": 5.075, "step": 2090 }, { "epoch": 0.27, "learning_rate": 4.549189569333387e-06, "logits/chosen": -2.784393787384033, "logits/rejected": -2.711235284805298, "logps/chosen": -375.9978332519531, "logps/rejected": -356.5938415527344, "loss": 0.6222, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.8262361288070679, "rewards/margins": 0.24428649246692657, "rewards/rejected": -1.0705227851867676, "step": 2100 }, { "epoch": 0.27, "eval_logits/chosen": -2.5947983264923096, "eval_logits/rejected": -2.6116442680358887, "eval_logps/chosen": -422.1054382324219, "eval_logps/rejected": -415.39764404296875, "eval_loss": 0.6300971508026123, "eval_rewards/accuracies": 0.6384999752044678, "eval_rewards/chosen": -0.8935604691505432, "eval_rewards/margins": 0.2493157833814621, "eval_rewards/rejected": -1.1428762674331665, "eval_runtime": 196.9277, "eval_samples_per_second": 10.156, "eval_steps_per_second": 5.078, "step": 2100 }, { "epoch": 0.28, "learning_rate": 4.542625187376491e-06, "logits/chosen": -2.7952916622161865, "logits/rejected": -2.7755210399627686, "logps/chosen": -446.38494873046875, "logps/rejected": -415.366455078125, "loss": 0.6496, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.8958450555801392, "rewards/margins": 0.19885332882404327, "rewards/rejected": -1.0946983098983765, "step": 2110 }, { "epoch": 0.28, "eval_logits/chosen": -2.593679904937744, "eval_logits/rejected": -2.610772132873535, "eval_logps/chosen": -418.0361022949219, "eval_logps/rejected": -410.80035400390625, "eval_loss": 0.629709005355835, "eval_rewards/accuracies": 0.640999972820282, "eval_rewards/chosen": -0.8528667092323303, "eval_rewards/margins": 0.24403661489486694, "eval_rewards/rejected": -1.0969033241271973, "eval_runtime": 196.9676, "eval_samples_per_second": 10.154, "eval_steps_per_second": 5.077, "step": 2110 }, { "epoch": 0.28, "learning_rate": 4.536018165543239e-06, "logits/chosen": -2.8523917198181152, "logits/rejected": -2.8088977336883545, "logps/chosen": -459.11102294921875, "logps/rejected": -462.03546142578125, "loss": 0.6135, "rewards/accuracies": 0.6875, "rewards/chosen": -0.8140150308609009, "rewards/margins": 0.28719818592071533, "rewards/rejected": -1.1012132167816162, "step": 2120 }, { "epoch": 0.28, "eval_logits/chosen": -2.589694023132324, "eval_logits/rejected": -2.606966257095337, "eval_logps/chosen": -416.22003173828125, "eval_logps/rejected": -408.8890380859375, "eval_loss": 0.6295616626739502, "eval_rewards/accuracies": 0.6389999985694885, "eval_rewards/chosen": -0.8347060680389404, "eval_rewards/margins": 0.24308432638645172, "eval_rewards/rejected": -1.0777904987335205, "eval_runtime": 196.8827, "eval_samples_per_second": 10.158, "eval_steps_per_second": 5.079, "step": 2120 }, { "epoch": 0.28, "learning_rate": 4.529368641755453e-06, "logits/chosen": -2.8522391319274902, "logits/rejected": -2.889514923095703, "logps/chosen": -359.7933349609375, "logps/rejected": -378.53997802734375, "loss": 0.65, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.868852436542511, "rewards/margins": 0.2264028787612915, "rewards/rejected": -1.0952553749084473, "step": 2130 }, { "epoch": 0.28, "eval_logits/chosen": -2.5772836208343506, "eval_logits/rejected": -2.594741106033325, "eval_logps/chosen": -421.50567626953125, "eval_logps/rejected": -415.184814453125, "eval_loss": 0.630107045173645, "eval_rewards/accuracies": 0.640500009059906, "eval_rewards/chosen": -0.8875633478164673, "eval_rewards/margins": 0.2531849145889282, "eval_rewards/rejected": -1.1407482624053955, "eval_runtime": 196.7983, "eval_samples_per_second": 10.163, "eval_steps_per_second": 5.081, "step": 2130 }, { "epoch": 0.28, "learning_rate": 4.522676754822189e-06, "logits/chosen": -2.7324087619781494, "logits/rejected": -2.6535348892211914, "logps/chosen": -436.89208984375, "logps/rejected": -360.9748229980469, "loss": 0.6562, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.9051302075386047, "rewards/margins": 0.21084150671958923, "rewards/rejected": -1.1159718036651611, "step": 2140 }, { "epoch": 0.28, "eval_logits/chosen": -2.58268666267395, "eval_logits/rejected": -2.5994439125061035, "eval_logps/chosen": -419.2276916503906, "eval_logps/rejected": -412.3636169433594, "eval_loss": 0.6285167932510376, "eval_rewards/accuracies": 0.6464999914169312, "eval_rewards/chosen": -0.8647826910018921, "eval_rewards/margins": 0.2477533221244812, "eval_rewards/rejected": -1.1125361919403076, "eval_runtime": 196.7684, "eval_samples_per_second": 10.164, "eval_steps_per_second": 5.082, "step": 2140 }, { "epoch": 0.28, "learning_rate": 4.515942644436836e-06, "logits/chosen": -2.78916597366333, "logits/rejected": -2.79569673538208, "logps/chosen": -430.1502990722656, "logps/rejected": -427.21038818359375, "loss": 0.5989, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.8669744729995728, "rewards/margins": 0.3714192509651184, "rewards/rejected": -1.238393783569336, "step": 2150 }, { "epoch": 0.28, "eval_logits/chosen": -2.5814082622528076, "eval_logits/rejected": -2.597965955734253, "eval_logps/chosen": -421.9512634277344, "eval_logps/rejected": -415.35882568359375, "eval_loss": 0.6280709505081177, "eval_rewards/accuracies": 0.6474999785423279, "eval_rewards/chosen": -0.8920185565948486, "eval_rewards/margins": 0.25046926736831665, "eval_rewards/rejected": -1.1424877643585205, "eval_runtime": 196.8996, "eval_samples_per_second": 10.157, "eval_steps_per_second": 5.079, "step": 2150 }, { "epoch": 0.28, "learning_rate": 4.509166451174194e-06, "logits/chosen": -2.8253769874572754, "logits/rejected": -2.824777364730835, "logps/chosen": -454.80169677734375, "logps/rejected": -447.1356506347656, "loss": 0.6232, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.8632291555404663, "rewards/margins": 0.23953184485435486, "rewards/rejected": -1.1027610301971436, "step": 2160 }, { "epoch": 0.28, "eval_logits/chosen": -2.5811665058135986, "eval_logits/rejected": -2.5974154472351074, "eval_logps/chosen": -426.8110656738281, "eval_logps/rejected": -420.6875305175781, "eval_loss": 0.62840735912323, "eval_rewards/accuracies": 0.6460000276565552, "eval_rewards/chosen": -0.9406165480613708, "eval_rewards/margins": 0.25515857338905334, "eval_rewards/rejected": -1.1957751512527466, "eval_runtime": 196.7753, "eval_samples_per_second": 10.164, "eval_steps_per_second": 5.082, "step": 2160 }, { "epoch": 0.28, "learning_rate": 4.502348316487552e-06, "logits/chosen": -2.7800397872924805, "logits/rejected": -2.74601411819458, "logps/chosen": -441.43670654296875, "logps/rejected": -417.4474182128906, "loss": 0.6596, "rewards/accuracies": 0.625, "rewards/chosen": -1.0352487564086914, "rewards/margins": 0.19729386270046234, "rewards/rejected": -1.2325425148010254, "step": 2170 }, { "epoch": 0.28, "eval_logits/chosen": -2.584304094314575, "eval_logits/rejected": -2.5999248027801514, "eval_logps/chosen": -424.75494384765625, "eval_logps/rejected": -418.04986572265625, "eval_loss": 0.6277941465377808, "eval_rewards/accuracies": 0.6464999914169312, "eval_rewards/chosen": -0.9200555086135864, "eval_rewards/margins": 0.24934299290180206, "eval_rewards/rejected": -1.169398546218872, "eval_runtime": 196.9617, "eval_samples_per_second": 10.154, "eval_steps_per_second": 5.077, "step": 2170 }, { "epoch": 0.29, "learning_rate": 4.495488382705722e-06, "logits/chosen": -2.776062488555908, "logits/rejected": -2.755868434906006, "logps/chosen": -491.7884216308594, "logps/rejected": -413.3304138183594, "loss": 0.5964, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.7972935438156128, "rewards/margins": 0.30835580825805664, "rewards/rejected": -1.105649471282959, "step": 2180 }, { "epoch": 0.29, "eval_logits/chosen": -2.5931167602539062, "eval_logits/rejected": -2.6081583499908447, "eval_logps/chosen": -418.2218933105469, "eval_logps/rejected": -410.45379638671875, "eval_loss": 0.6268242597579956, "eval_rewards/accuracies": 0.6514999866485596, "eval_rewards/chosen": -0.8547250032424927, "eval_rewards/margins": 0.23871254920959473, "eval_rewards/rejected": -1.0934375524520874, "eval_runtime": 196.9069, "eval_samples_per_second": 10.157, "eval_steps_per_second": 5.079, "step": 2180 }, { "epoch": 0.29, "learning_rate": 4.488586793030075e-06, "logits/chosen": -2.7607836723327637, "logits/rejected": -2.716301441192627, "logps/chosen": -357.68621826171875, "logps/rejected": -410.98468017578125, "loss": 0.5492, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.7945287227630615, "rewards/margins": 0.4202180802822113, "rewards/rejected": -1.2147467136383057, "step": 2190 }, { "epoch": 0.29, "eval_logits/chosen": -2.591677188873291, "eval_logits/rejected": -2.6068708896636963, "eval_logps/chosen": -417.1944274902344, "eval_logps/rejected": -409.63494873046875, "eval_loss": 0.6265187859535217, "eval_rewards/accuracies": 0.6514999866485596, "eval_rewards/chosen": -0.8444498181343079, "eval_rewards/margins": 0.2407991737127304, "eval_rewards/rejected": -1.0852489471435547, "eval_runtime": 196.95, "eval_samples_per_second": 10.155, "eval_steps_per_second": 5.077, "step": 2190 }, { "epoch": 0.29, "learning_rate": 4.481643691531551e-06, "logits/chosen": -2.8239293098449707, "logits/rejected": -2.846830368041992, "logps/chosen": -403.1731872558594, "logps/rejected": -379.99462890625, "loss": 0.6317, "rewards/accuracies": 0.625, "rewards/chosen": -0.7410529851913452, "rewards/margins": 0.24859830737113953, "rewards/rejected": -0.9896513223648071, "step": 2200 }, { "epoch": 0.29, "eval_logits/chosen": -2.5892865657806396, "eval_logits/rejected": -2.6051228046417236, "eval_logps/chosen": -413.1067199707031, "eval_logps/rejected": -405.3682556152344, "eval_loss": 0.6259841322898865, "eval_rewards/accuracies": 0.6520000100135803, "eval_rewards/chosen": -0.8035732507705688, "eval_rewards/margins": 0.2390093058347702, "eval_rewards/rejected": -1.0425825119018555, "eval_runtime": 197.1506, "eval_samples_per_second": 10.145, "eval_steps_per_second": 5.072, "step": 2200 }, { "epoch": 0.29, "learning_rate": 4.474659223147652e-06, "logits/chosen": -2.813742160797119, "logits/rejected": -2.821537494659424, "logps/chosen": -422.7051696777344, "logps/rejected": -411.17791748046875, "loss": 0.6286, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.8256515264511108, "rewards/margins": 0.2547362744808197, "rewards/rejected": -1.080387830734253, "step": 2210 }, { "epoch": 0.29, "eval_logits/chosen": -2.582897663116455, "eval_logits/rejected": -2.5993919372558594, "eval_logps/chosen": -413.4576110839844, "eval_logps/rejected": -406.21124267578125, "eval_loss": 0.625976026058197, "eval_rewards/accuracies": 0.6445000171661377, "eval_rewards/chosen": -0.8070821166038513, "eval_rewards/margins": 0.243929922580719, "eval_rewards/rejected": -1.0510119199752808, "eval_runtime": 196.7712, "eval_samples_per_second": 10.164, "eval_steps_per_second": 5.082, "step": 2210 }, { "epoch": 0.29, "learning_rate": 4.4676335336794125e-06, "logits/chosen": -2.7268691062927246, "logits/rejected": -2.7420523166656494, "logps/chosen": -458.7037048339844, "logps/rejected": -435.62042236328125, "loss": 0.6342, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.8187200427055359, "rewards/margins": 0.21425040066242218, "rewards/rejected": -1.0329705476760864, "step": 2220 }, { "epoch": 0.29, "eval_logits/chosen": -2.5829458236694336, "eval_logits/rejected": -2.5995917320251465, "eval_logps/chosen": -415.4107360839844, "eval_logps/rejected": -408.4854736328125, "eval_loss": 0.6260092258453369, "eval_rewards/accuracies": 0.6464999914169312, "eval_rewards/chosen": -0.8266136646270752, "eval_rewards/margins": 0.24714109301567078, "eval_rewards/rejected": -1.0737547874450684, "eval_runtime": 196.9414, "eval_samples_per_second": 10.155, "eval_steps_per_second": 5.078, "step": 2220 }, { "epoch": 0.29, "learning_rate": 4.46056676978836e-06, "logits/chosen": -2.761662006378174, "logits/rejected": -2.779341697692871, "logps/chosen": -397.09051513671875, "logps/rejected": -458.64166259765625, "loss": 0.6217, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.7701650857925415, "rewards/margins": 0.2514593005180359, "rewards/rejected": -1.0216243267059326, "step": 2230 }, { "epoch": 0.29, "eval_logits/chosen": -2.5795228481292725, "eval_logits/rejected": -2.596259117126465, "eval_logps/chosen": -421.1188049316406, "eval_logps/rejected": -415.04644775390625, "eval_loss": 0.6261369585990906, "eval_rewards/accuracies": 0.6464999914169312, "eval_rewards/chosen": -0.8836943507194519, "eval_rewards/margins": 0.25567007064819336, "eval_rewards/rejected": -1.139364242553711, "eval_runtime": 197.2042, "eval_samples_per_second": 10.142, "eval_steps_per_second": 5.071, "step": 2230 }, { "epoch": 0.29, "learning_rate": 4.453459078993453e-06, "logits/chosen": -2.692732334136963, "logits/rejected": -2.79284930229187, "logps/chosen": -395.21856689453125, "logps/rejected": -419.09454345703125, "loss": 0.5913, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.8348531723022461, "rewards/margins": 0.3056022524833679, "rewards/rejected": -1.1404553651809692, "step": 2240 }, { "epoch": 0.29, "eval_logits/chosen": -2.570924758911133, "eval_logits/rejected": -2.58809757232666, "eval_logps/chosen": -424.5317687988281, "eval_logps/rejected": -419.3282470703125, "eval_loss": 0.6271562576293945, "eval_rewards/accuracies": 0.6430000066757202, "eval_rewards/chosen": -0.9178237915039062, "eval_rewards/margins": 0.26435843110084534, "eval_rewards/rejected": -1.1821821928024292, "eval_runtime": 196.9415, "eval_samples_per_second": 10.155, "eval_steps_per_second": 5.078, "step": 2240 }, { "epoch": 0.29, "learning_rate": 4.446310609668001e-06, "logits/chosen": -2.659118413925171, "logits/rejected": -2.699690341949463, "logps/chosen": -386.1409606933594, "logps/rejected": -446.634033203125, "loss": 0.6624, "rewards/accuracies": 0.625, "rewards/chosen": -0.9850989580154419, "rewards/margins": 0.194298654794693, "rewards/rejected": -1.1793975830078125, "step": 2250 }, { "epoch": 0.29, "eval_logits/chosen": -2.570498466491699, "eval_logits/rejected": -2.5880205631256104, "eval_logps/chosen": -425.5768127441406, "eval_logps/rejected": -420.7292785644531, "eval_loss": 0.6279781460762024, "eval_rewards/accuracies": 0.6420000195503235, "eval_rewards/chosen": -0.9282740354537964, "eval_rewards/margins": 0.26791858673095703, "eval_rewards/rejected": -1.1961926221847534, "eval_runtime": 196.9759, "eval_samples_per_second": 10.154, "eval_steps_per_second": 5.077, "step": 2250 }, { "epoch": 0.3, "learning_rate": 4.439121511036562e-06, "logits/chosen": -2.758730411529541, "logits/rejected": -2.7282633781433105, "logps/chosen": -440.101318359375, "logps/rejected": -413.44891357421875, "loss": 0.6178, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.8409091234207153, "rewards/margins": 0.31042739748954773, "rewards/rejected": -1.151336431503296, "step": 2260 }, { "epoch": 0.3, "eval_logits/chosen": -2.575878620147705, "eval_logits/rejected": -2.5933985710144043, "eval_logps/chosen": -420.9499816894531, "eval_logps/rejected": -415.45281982421875, "eval_loss": 0.628210186958313, "eval_rewards/accuracies": 0.640999972820282, "eval_rewards/chosen": -0.8820055723190308, "eval_rewards/margins": 0.26142239570617676, "eval_rewards/rejected": -1.143427848815918, "eval_runtime": 196.9298, "eval_samples_per_second": 10.156, "eval_steps_per_second": 5.078, "step": 2260 }, { "epoch": 0.3, "learning_rate": 4.431891933171839e-06, "logits/chosen": -2.731050968170166, "logits/rejected": -2.701270580291748, "logps/chosen": -414.69268798828125, "logps/rejected": -407.79150390625, "loss": 0.6685, "rewards/accuracies": 0.625, "rewards/chosen": -0.8784014582633972, "rewards/margins": 0.18701156973838806, "rewards/rejected": -1.0654131174087524, "step": 2270 }, { "epoch": 0.3, "eval_logits/chosen": -2.5852274894714355, "eval_logits/rejected": -2.6019883155822754, "eval_logps/chosen": -421.2300109863281, "eval_logps/rejected": -415.59075927734375, "eval_loss": 0.6269444823265076, "eval_rewards/accuracies": 0.6455000042915344, "eval_rewards/chosen": -0.8848059773445129, "eval_rewards/margins": 0.2600012421607971, "eval_rewards/rejected": -1.14480721950531, "eval_runtime": 196.8549, "eval_samples_per_second": 10.16, "eval_steps_per_second": 5.08, "step": 2270 }, { "epoch": 0.3, "learning_rate": 4.424622026991536e-06, "logits/chosen": -2.7388529777526855, "logits/rejected": -2.7334494590759277, "logps/chosen": -420.9461975097656, "logps/rejected": -408.1824951171875, "loss": 0.6301, "rewards/accuracies": 0.625, "rewards/chosen": -0.8830841183662415, "rewards/margins": 0.2399568259716034, "rewards/rejected": -1.1230409145355225, "step": 2280 }, { "epoch": 0.3, "eval_logits/chosen": -2.5922598838806152, "eval_logits/rejected": -2.60862398147583, "eval_logps/chosen": -421.9366760253906, "eval_logps/rejected": -416.2523193359375, "eval_loss": 0.6262630224227905, "eval_rewards/accuracies": 0.6474999785423279, "eval_rewards/chosen": -0.891872763633728, "eval_rewards/margins": 0.2595498561859131, "eval_rewards/rejected": -1.1514227390289307, "eval_runtime": 196.8689, "eval_samples_per_second": 10.159, "eval_steps_per_second": 5.08, "step": 2280 }, { "epoch": 0.3, "learning_rate": 4.417311944255215e-06, "logits/chosen": -2.8399720191955566, "logits/rejected": -2.8531653881073, "logps/chosen": -379.5211181640625, "logps/rejected": -424.2601623535156, "loss": 0.6861, "rewards/accuracies": 0.5625, "rewards/chosen": -0.863876461982727, "rewards/margins": 0.12213647365570068, "rewards/rejected": -0.9860130548477173, "step": 2290 }, { "epoch": 0.3, "eval_logits/chosen": -2.590639352798462, "eval_logits/rejected": -2.6069109439849854, "eval_logps/chosen": -423.515625, "eval_logps/rejected": -418.02130126953125, "eval_loss": 0.6258890628814697, "eval_rewards/accuracies": 0.6495000123977661, "eval_rewards/chosen": -0.9076623320579529, "eval_rewards/margins": 0.26145049929618835, "eval_rewards/rejected": -1.1691128015518188, "eval_runtime": 197.0851, "eval_samples_per_second": 10.148, "eval_steps_per_second": 5.074, "step": 2290 }, { "epoch": 0.3, "learning_rate": 4.409961837561122e-06, "logits/chosen": -2.789848804473877, "logits/rejected": -2.7216029167175293, "logps/chosen": -463.98345947265625, "logps/rejected": -494.39251708984375, "loss": 0.6088, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.9309770464897156, "rewards/margins": 0.31190377473831177, "rewards/rejected": -1.2428807020187378, "step": 2300 }, { "epoch": 0.3, "eval_logits/chosen": -2.5774741172790527, "eval_logits/rejected": -2.5942113399505615, "eval_logps/chosen": -424.3768005371094, "eval_logps/rejected": -419.164306640625, "eval_loss": 0.6265602707862854, "eval_rewards/accuracies": 0.6460000276565552, "eval_rewards/chosen": -0.9162741899490356, "eval_rewards/margins": 0.2642686367034912, "eval_rewards/rejected": -1.1805428266525269, "eval_runtime": 196.9992, "eval_samples_per_second": 10.152, "eval_steps_per_second": 5.076, "step": 2300 }, { "epoch": 0.3, "learning_rate": 4.402571860343006e-06, "logits/chosen": -2.7374491691589355, "logits/rejected": -2.7134087085723877, "logps/chosen": -429.6944885253906, "logps/rejected": -379.41595458984375, "loss": 0.6074, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.7847088575363159, "rewards/margins": 0.2951991558074951, "rewards/rejected": -1.0799081325531006, "step": 2310 }, { "epoch": 0.3, "eval_logits/chosen": -2.5742263793945312, "eval_logits/rejected": -2.591237783432007, "eval_logps/chosen": -419.9903259277344, "eval_logps/rejected": -414.24224853515625, "eval_loss": 0.6268322467803955, "eval_rewards/accuracies": 0.6399999856948853, "eval_rewards/chosen": -0.8724092841148376, "eval_rewards/margins": 0.2589130699634552, "eval_rewards/rejected": -1.1313222646713257, "eval_runtime": 196.9684, "eval_samples_per_second": 10.154, "eval_steps_per_second": 5.077, "step": 2310 }, { "epoch": 0.3, "learning_rate": 4.3951421668669165e-06, "logits/chosen": -2.7886240482330322, "logits/rejected": -2.7811214923858643, "logps/chosen": -431.88958740234375, "logps/rejected": -444.73175048828125, "loss": 0.5575, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.8605194091796875, "rewards/margins": 0.41099271178245544, "rewards/rejected": -1.2715120315551758, "step": 2320 }, { "epoch": 0.3, "eval_logits/chosen": -2.567586660385132, "eval_logits/rejected": -2.5854969024658203, "eval_logps/chosen": -426.3576354980469, "eval_logps/rejected": -421.8108215332031, "eval_loss": 0.6284373998641968, "eval_rewards/accuracies": 0.6434999704360962, "eval_rewards/chosen": -0.9360825419425964, "eval_rewards/margins": 0.27092528343200684, "eval_rewards/rejected": -1.2070077657699585, "eval_runtime": 196.778, "eval_samples_per_second": 10.164, "eval_steps_per_second": 5.082, "step": 2320 }, { "epoch": 0.3, "learning_rate": 4.3876729122279784e-06, "logits/chosen": -2.776318073272705, "logits/rejected": -2.809359312057495, "logps/chosen": -338.4325256347656, "logps/rejected": -368.7469177246094, "loss": 0.5775, "rewards/accuracies": 0.625, "rewards/chosen": -0.8960781097412109, "rewards/margins": 0.3791848123073578, "rewards/rejected": -1.2752629518508911, "step": 2330 }, { "epoch": 0.3, "eval_logits/chosen": -2.5665230751037598, "eval_logits/rejected": -2.584770679473877, "eval_logps/chosen": -433.9892578125, "eval_logps/rejected": -430.6888122558594, "eval_loss": 0.6298844814300537, "eval_rewards/accuracies": 0.6420000195503235, "eval_rewards/chosen": -1.01239812374115, "eval_rewards/margins": 0.28338971734046936, "eval_rewards/rejected": -1.2957879304885864, "eval_runtime": 196.7239, "eval_samples_per_second": 10.167, "eval_steps_per_second": 5.083, "step": 2330 }, { "epoch": 0.31, "learning_rate": 4.3801642523471585e-06, "logits/chosen": -2.8114333152770996, "logits/rejected": -2.7745885848999023, "logps/chosen": -434.7900390625, "logps/rejected": -414.8701171875, "loss": 0.5663, "rewards/accuracies": 0.6875, "rewards/chosen": -0.9342101812362671, "rewards/margins": 0.3892834484577179, "rewards/rejected": -1.3234935998916626, "step": 2340 }, { "epoch": 0.31, "eval_logits/chosen": -2.5688867568969727, "eval_logits/rejected": -2.5871498584747314, "eval_logps/chosen": -436.9039306640625, "eval_logps/rejected": -434.2275695800781, "eval_loss": 0.6300011277198792, "eval_rewards/accuracies": 0.6414999961853027, "eval_rewards/chosen": -1.041544795036316, "eval_rewards/margins": 0.2896304726600647, "eval_rewards/rejected": -1.3311753273010254, "eval_runtime": 196.9366, "eval_samples_per_second": 10.156, "eval_steps_per_second": 5.078, "step": 2340 }, { "epoch": 0.31, "learning_rate": 4.37261634396801e-06, "logits/chosen": -2.6677405834198, "logits/rejected": -2.6676297187805176, "logps/chosen": -426.80712890625, "logps/rejected": -432.39813232421875, "loss": 0.5976, "rewards/accuracies": 0.6875, "rewards/chosen": -1.0480899810791016, "rewards/margins": 0.3257550299167633, "rewards/rejected": -1.3738449811935425, "step": 2350 }, { "epoch": 0.31, "eval_logits/chosen": -2.5646708011627197, "eval_logits/rejected": -2.582854747772217, "eval_logps/chosen": -435.26544189453125, "eval_logps/rejected": -432.55438232421875, "eval_loss": 0.6303899884223938, "eval_rewards/accuracies": 0.6395000219345093, "eval_rewards/chosen": -1.025160312652588, "eval_rewards/margins": 0.28928351402282715, "eval_rewards/rejected": -1.3144437074661255, "eval_runtime": 196.9692, "eval_samples_per_second": 10.154, "eval_steps_per_second": 5.077, "step": 2350 }, { "epoch": 0.31, "learning_rate": 4.365029344653401e-06, "logits/chosen": -2.7826085090637207, "logits/rejected": -2.7922708988189697, "logps/chosen": -518.70361328125, "logps/rejected": -454.2701721191406, "loss": 0.6032, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.0072052478790283, "rewards/margins": 0.377673864364624, "rewards/rejected": -1.3848788738250732, "step": 2360 }, { "epoch": 0.31, "eval_logits/chosen": -2.564948558807373, "eval_logits/rejected": -2.582928419113159, "eval_logps/chosen": -434.41937255859375, "eval_logps/rejected": -431.6936950683594, "eval_loss": 0.6297749876976013, "eval_rewards/accuracies": 0.6395000219345093, "eval_rewards/chosen": -1.0167001485824585, "eval_rewards/margins": 0.28913629055023193, "eval_rewards/rejected": -1.3058364391326904, "eval_runtime": 196.9346, "eval_samples_per_second": 10.156, "eval_steps_per_second": 5.078, "step": 2360 }, { "epoch": 0.31, "learning_rate": 4.35740341278222e-06, "logits/chosen": -2.785799264907837, "logits/rejected": -2.825850009918213, "logps/chosen": -504.35968017578125, "logps/rejected": -487.5384216308594, "loss": 0.6569, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.952163577079773, "rewards/margins": 0.20112566649913788, "rewards/rejected": -1.1532893180847168, "step": 2370 }, { "epoch": 0.31, "eval_logits/chosen": -2.565399169921875, "eval_logits/rejected": -2.583078384399414, "eval_logps/chosen": -431.9676208496094, "eval_logps/rejected": -428.8084716796875, "eval_loss": 0.6288526654243469, "eval_rewards/accuracies": 0.640999972820282, "eval_rewards/chosen": -0.992182195186615, "eval_rewards/margins": 0.2848021686077118, "eval_rewards/rejected": -1.2769843339920044, "eval_runtime": 197.1699, "eval_samples_per_second": 10.144, "eval_steps_per_second": 5.072, "step": 2370 }, { "epoch": 0.31, "learning_rate": 4.349738707546079e-06, "logits/chosen": -2.6712303161621094, "logits/rejected": -2.681317090988159, "logps/chosen": -432.171630859375, "logps/rejected": -398.49884033203125, "loss": 0.6557, "rewards/accuracies": 0.5625, "rewards/chosen": -0.9553629755973816, "rewards/margins": 0.19942878186702728, "rewards/rejected": -1.1547917127609253, "step": 2380 }, { "epoch": 0.31, "eval_logits/chosen": -2.5676705837249756, "eval_logits/rejected": -2.5848608016967773, "eval_logps/chosen": -434.3564453125, "eval_logps/rejected": -431.3999328613281, "eval_loss": 0.6287895441055298, "eval_rewards/accuracies": 0.640999972820282, "eval_rewards/chosen": -1.0160703659057617, "eval_rewards/margins": 0.28682854771614075, "eval_rewards/rejected": -1.3028990030288696, "eval_runtime": 196.8905, "eval_samples_per_second": 10.158, "eval_steps_per_second": 5.079, "step": 2380 }, { "epoch": 0.31, "learning_rate": 4.3420353889459835e-06, "logits/chosen": -2.835454225540161, "logits/rejected": -2.818660259246826, "logps/chosen": -486.8052673339844, "logps/rejected": -451.1253967285156, "loss": 0.593, "rewards/accuracies": 0.6875, "rewards/chosen": -0.9559980630874634, "rewards/margins": 0.37571167945861816, "rewards/rejected": -1.331709623336792, "step": 2390 }, { "epoch": 0.31, "eval_logits/chosen": -2.5628139972686768, "eval_logits/rejected": -2.579824686050415, "eval_logps/chosen": -436.363525390625, "eval_logps/rejected": -433.6669921875, "eval_loss": 0.62941575050354, "eval_rewards/accuracies": 0.6414999961853027, "eval_rewards/chosen": -1.0361416339874268, "eval_rewards/margins": 0.2894286513328552, "eval_rewards/rejected": -1.3255702257156372, "eval_runtime": 197.1764, "eval_samples_per_second": 10.143, "eval_steps_per_second": 5.072, "step": 2390 }, { "epoch": 0.31, "learning_rate": 4.334293617788992e-06, "logits/chosen": -2.8445041179656982, "logits/rejected": -2.79730486869812, "logps/chosen": -416.4369201660156, "logps/rejected": -369.0491638183594, "loss": 0.5738, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.0192604064941406, "rewards/margins": 0.47983551025390625, "rewards/rejected": -1.4990959167480469, "step": 2400 }, { "epoch": 0.31, "eval_logits/chosen": -2.565260171890259, "eval_logits/rejected": -2.5818259716033936, "eval_logps/chosen": -431.8421325683594, "eval_logps/rejected": -428.3226623535156, "eval_loss": 0.628280520439148, "eval_rewards/accuracies": 0.6395000219345093, "eval_rewards/chosen": -0.9909270405769348, "eval_rewards/margins": 0.2811991274356842, "eval_rewards/rejected": -1.272126317024231, "eval_runtime": 196.9197, "eval_samples_per_second": 10.156, "eval_steps_per_second": 5.078, "step": 2400 }, { "epoch": 0.32, "learning_rate": 4.326513555684867e-06, "logits/chosen": -2.804062843322754, "logits/rejected": -2.7835028171539307, "logps/chosen": -459.07330322265625, "logps/rejected": -400.3332214355469, "loss": 0.6016, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.8659582138061523, "rewards/margins": 0.28941652178764343, "rewards/rejected": -1.1553747653961182, "step": 2410 }, { "epoch": 0.32, "eval_logits/chosen": -2.569629430770874, "eval_logits/rejected": -2.5860977172851562, "eval_logps/chosen": -427.0762634277344, "eval_logps/rejected": -422.9002380371094, "eval_loss": 0.6277644038200378, "eval_rewards/accuracies": 0.6380000114440918, "eval_rewards/chosen": -0.9432685375213623, "eval_rewards/margins": 0.27463406324386597, "eval_rewards/rejected": -1.2179025411605835, "eval_runtime": 196.9404, "eval_samples_per_second": 10.155, "eval_steps_per_second": 5.078, "step": 2410 }, { "epoch": 0.32, "learning_rate": 4.31869536504269e-06, "logits/chosen": -2.7398853302001953, "logits/rejected": -2.775299310684204, "logps/chosen": -407.65118408203125, "logps/rejected": -421.64093017578125, "loss": 0.5889, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.9197827577590942, "rewards/margins": 0.32822954654693604, "rewards/rejected": -1.2480123043060303, "step": 2420 }, { "epoch": 0.32, "eval_logits/chosen": -2.556795835494995, "eval_logits/rejected": -2.5738165378570557, "eval_logps/chosen": -426.9832458496094, "eval_logps/rejected": -422.9380187988281, "eval_loss": 0.6303883194923401, "eval_rewards/accuracies": 0.6345000267028809, "eval_rewards/chosen": -0.9423384070396423, "eval_rewards/margins": 0.27594175934791565, "eval_rewards/rejected": -1.2182801961898804, "eval_runtime": 196.9091, "eval_samples_per_second": 10.157, "eval_steps_per_second": 5.078, "step": 2420 }, { "epoch": 0.32, "learning_rate": 4.310839209067482e-06, "logits/chosen": -2.842728853225708, "logits/rejected": -2.793224334716797, "logps/chosen": -423.9481506347656, "logps/rejected": -408.9284362792969, "loss": 0.6591, "rewards/accuracies": 0.5625, "rewards/chosen": -0.9646501541137695, "rewards/margins": 0.16199491918087006, "rewards/rejected": -1.1266452074050903, "step": 2430 }, { "epoch": 0.32, "eval_logits/chosen": -2.5522592067718506, "eval_logits/rejected": -2.5687339305877686, "eval_logps/chosen": -426.2090759277344, "eval_logps/rejected": -422.0822448730469, "eval_loss": 0.6298808455467224, "eval_rewards/accuracies": 0.6324999928474426, "eval_rewards/chosen": -0.9345968961715698, "eval_rewards/margins": 0.2751254737377167, "eval_rewards/rejected": -1.2097221612930298, "eval_runtime": 197.0417, "eval_samples_per_second": 10.15, "eval_steps_per_second": 5.075, "step": 2430 }, { "epoch": 0.32, "learning_rate": 4.302945251756788e-06, "logits/chosen": -2.7333877086639404, "logits/rejected": -2.7457363605499268, "logps/chosen": -420.790283203125, "logps/rejected": -406.7698669433594, "loss": 0.5945, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.9372466802597046, "rewards/margins": 0.35772770643234253, "rewards/rejected": -1.294974446296692, "step": 2440 }, { "epoch": 0.32, "eval_logits/chosen": -2.5549113750457764, "eval_logits/rejected": -2.571284294128418, "eval_logps/chosen": -422.33050537109375, "eval_logps/rejected": -417.47723388671875, "eval_loss": 0.6298490166664124, "eval_rewards/accuracies": 0.6349999904632568, "eval_rewards/chosen": -0.8958110213279724, "eval_rewards/margins": 0.26786088943481445, "eval_rewards/rejected": -1.1636719703674316, "eval_runtime": 197.0623, "eval_samples_per_second": 10.149, "eval_steps_per_second": 5.075, "step": 2440 }, { "epoch": 0.32, "learning_rate": 4.29501365789726e-06, "logits/chosen": -2.744837522506714, "logits/rejected": -2.7025675773620605, "logps/chosen": -375.1982727050781, "logps/rejected": -369.05279541015625, "loss": 0.6374, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.9093378186225891, "rewards/margins": 0.3083241879940033, "rewards/rejected": -1.2176620960235596, "step": 2450 }, { "epoch": 0.32, "eval_logits/chosen": -2.5562515258789062, "eval_logits/rejected": -2.572701930999756, "eval_logps/chosen": -420.1925964355469, "eval_logps/rejected": -415.2203063964844, "eval_loss": 0.6289076805114746, "eval_rewards/accuracies": 0.6365000009536743, "eval_rewards/chosen": -0.8744320273399353, "eval_rewards/margins": 0.26667073369026184, "eval_rewards/rejected": -1.1411027908325195, "eval_runtime": 196.7816, "eval_samples_per_second": 10.164, "eval_steps_per_second": 5.082, "step": 2450 }, { "epoch": 0.32, "learning_rate": 4.2870445930612135e-06, "logits/chosen": -2.7384285926818848, "logits/rejected": -2.7128889560699463, "logps/chosen": -462.66632080078125, "logps/rejected": -456.52777099609375, "loss": 0.5373, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.7665729522705078, "rewards/margins": 0.485451877117157, "rewards/rejected": -1.2520248889923096, "step": 2460 }, { "epoch": 0.32, "eval_logits/chosen": -2.5484607219696045, "eval_logits/rejected": -2.56520676612854, "eval_logps/chosen": -422.5688781738281, "eval_logps/rejected": -417.9743347167969, "eval_loss": 0.6304011344909668, "eval_rewards/accuracies": 0.6324999928474426, "eval_rewards/chosen": -0.8981947302818298, "eval_rewards/margins": 0.2704484164714813, "eval_rewards/rejected": -1.1686433553695679, "eval_runtime": 196.9165, "eval_samples_per_second": 10.157, "eval_steps_per_second": 5.078, "step": 2460 }, { "epoch": 0.32, "learning_rate": 4.279038223603171e-06, "logits/chosen": -2.7502496242523193, "logits/rejected": -2.770395278930664, "logps/chosen": -421.0673828125, "logps/rejected": -402.5542907714844, "loss": 0.5907, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.8335832357406616, "rewards/margins": 0.3773055672645569, "rewards/rejected": -1.2108887434005737, "step": 2470 }, { "epoch": 0.32, "eval_logits/chosen": -2.532897472381592, "eval_logits/rejected": -2.5502543449401855, "eval_logps/chosen": -432.739013671875, "eval_logps/rejected": -429.7646179199219, "eval_loss": 0.632610559463501, "eval_rewards/accuracies": 0.6309999823570251, "eval_rewards/chosen": -0.9998957514762878, "eval_rewards/margins": 0.28665024042129517, "eval_rewards/rejected": -1.2865458726882935, "eval_runtime": 196.9374, "eval_samples_per_second": 10.156, "eval_steps_per_second": 5.078, "step": 2470 }, { "epoch": 0.32, "learning_rate": 4.2709947166563906e-06, "logits/chosen": -2.638233184814453, "logits/rejected": -2.607182025909424, "logps/chosen": -435.4264221191406, "logps/rejected": -462.9147033691406, "loss": 0.611, "rewards/accuracies": 0.6875, "rewards/chosen": -1.0771570205688477, "rewards/margins": 0.339433491230011, "rewards/rejected": -1.4165904521942139, "step": 2480 }, { "epoch": 0.32, "eval_logits/chosen": -2.5232503414154053, "eval_logits/rejected": -2.540679693222046, "eval_logps/chosen": -433.52130126953125, "eval_logps/rejected": -430.7619323730469, "eval_loss": 0.6332414746284485, "eval_rewards/accuracies": 0.6340000033378601, "eval_rewards/chosen": -1.0077186822891235, "eval_rewards/margins": 0.2888000011444092, "eval_rewards/rejected": -1.2965186834335327, "eval_runtime": 196.9153, "eval_samples_per_second": 10.157, "eval_steps_per_second": 5.078, "step": 2480 }, { "epoch": 0.33, "learning_rate": 4.262914240129379e-06, "logits/chosen": -2.7348380088806152, "logits/rejected": -2.7146236896514893, "logps/chosen": -457.7591247558594, "logps/rejected": -439.1285705566406, "loss": 0.6033, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.9677481651306152, "rewards/margins": 0.43764448165893555, "rewards/rejected": -1.4053925275802612, "step": 2490 }, { "epoch": 0.33, "eval_logits/chosen": -2.5245630741119385, "eval_logits/rejected": -2.5417044162750244, "eval_logps/chosen": -431.026123046875, "eval_logps/rejected": -427.7772216796875, "eval_loss": 0.6321043968200684, "eval_rewards/accuracies": 0.6365000009536743, "eval_rewards/chosen": -0.9827673435211182, "eval_rewards/margins": 0.28390470147132874, "eval_rewards/rejected": -1.266672134399414, "eval_runtime": 196.7971, "eval_samples_per_second": 10.163, "eval_steps_per_second": 5.081, "step": 2490 }, { "epoch": 0.33, "learning_rate": 4.254796962702382e-06, "logits/chosen": -2.7546756267547607, "logits/rejected": -2.7376418113708496, "logps/chosen": -446.4517517089844, "logps/rejected": -444.3236389160156, "loss": 0.6122, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.8910658955574036, "rewards/margins": 0.300523579120636, "rewards/rejected": -1.191589593887329, "step": 2500 }, { "epoch": 0.33, "eval_logits/chosen": -2.5351521968841553, "eval_logits/rejected": -2.551602602005005, "eval_logps/chosen": -429.4399108886719, "eval_logps/rejected": -425.7723693847656, "eval_loss": 0.6307942867279053, "eval_rewards/accuracies": 0.6365000009536743, "eval_rewards/chosen": -0.966904878616333, "eval_rewards/margins": 0.27971866726875305, "eval_rewards/rejected": -1.2466236352920532, "eval_runtime": 197.2545, "eval_samples_per_second": 10.139, "eval_steps_per_second": 5.07, "step": 2500 }, { "epoch": 0.33, "learning_rate": 4.246643053823864e-06, "logits/chosen": -2.7471210956573486, "logits/rejected": -2.7411389350891113, "logps/chosen": -359.96807861328125, "logps/rejected": -394.1663818359375, "loss": 0.6077, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.8434604406356812, "rewards/margins": 0.3051786720752716, "rewards/rejected": -1.1486390829086304, "step": 2510 }, { "epoch": 0.33, "eval_logits/chosen": -2.548297882080078, "eval_logits/rejected": -2.5639851093292236, "eval_logps/chosen": -429.1539306640625, "eval_logps/rejected": -425.4289855957031, "eval_loss": 0.628643810749054, "eval_rewards/accuracies": 0.640500009059906, "eval_rewards/chosen": -0.9640450477600098, "eval_rewards/margins": 0.27914461493492126, "eval_rewards/rejected": -1.2431896924972534, "eval_runtime": 197.0617, "eval_samples_per_second": 10.149, "eval_steps_per_second": 5.075, "step": 2510 }, { "epoch": 0.33, "learning_rate": 4.238452683706979e-06, "logits/chosen": -2.7691006660461426, "logits/rejected": -2.7818400859832764, "logps/chosen": -388.3199462890625, "logps/rejected": -355.18719482421875, "loss": 0.6255, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.9922312498092651, "rewards/margins": 0.24085617065429688, "rewards/rejected": -1.2330873012542725, "step": 2520 }, { "epoch": 0.33, "eval_logits/chosen": -2.5428717136383057, "eval_logits/rejected": -2.5581729412078857, "eval_logps/chosen": -435.9595947265625, "eval_logps/rejected": -433.4538269042969, "eval_loss": 0.6296377182006836, "eval_rewards/accuracies": 0.6389999985694885, "eval_rewards/chosen": -1.032102108001709, "eval_rewards/margins": 0.2913359999656677, "eval_rewards/rejected": -1.3234381675720215, "eval_runtime": 197.3217, "eval_samples_per_second": 10.136, "eval_steps_per_second": 5.068, "step": 2520 }, { "epoch": 0.33, "learning_rate": 4.2302260233260025e-06, "logits/chosen": -2.712089776992798, "logits/rejected": -2.762547731399536, "logps/chosen": -442.83929443359375, "logps/rejected": -461.90924072265625, "loss": 0.6172, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.0242395401000977, "rewards/margins": 0.3576509356498718, "rewards/rejected": -1.3818905353546143, "step": 2530 }, { "epoch": 0.33, "eval_logits/chosen": -2.53926420211792, "eval_logits/rejected": -2.5550448894500732, "eval_logps/chosen": -437.32904052734375, "eval_logps/rejected": -435.19122314453125, "eval_loss": 0.63003009557724, "eval_rewards/accuracies": 0.640999972820282, "eval_rewards/chosen": -1.0457963943481445, "eval_rewards/margins": 0.29501575231552124, "eval_rewards/rejected": -1.340812087059021, "eval_runtime": 197.2545, "eval_samples_per_second": 10.139, "eval_steps_per_second": 5.07, "step": 2530 }, { "epoch": 0.33, "learning_rate": 4.2219632444127766e-06, "logits/chosen": -2.6461236476898193, "logits/rejected": -2.662266969680786, "logps/chosen": -439.08544921875, "logps/rejected": -439.2572326660156, "loss": 0.6536, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.9915159344673157, "rewards/margins": 0.23148474097251892, "rewards/rejected": -1.2230005264282227, "step": 2540 }, { "epoch": 0.33, "eval_logits/chosen": -2.5464367866516113, "eval_logits/rejected": -2.5626463890075684, "eval_logps/chosen": -430.1457824707031, "eval_logps/rejected": -426.9624938964844, "eval_loss": 0.6277977824211121, "eval_rewards/accuracies": 0.6434999704360962, "eval_rewards/chosen": -0.9739632606506348, "eval_rewards/margins": 0.2845614552497864, "eval_rewards/rejected": -1.2585248947143555, "eval_runtime": 196.8842, "eval_samples_per_second": 10.158, "eval_steps_per_second": 5.079, "step": 2540 }, { "epoch": 0.33, "learning_rate": 4.213664519453115e-06, "logits/chosen": -2.822821617126465, "logits/rejected": -2.768632650375366, "logps/chosen": -404.8807373046875, "logps/rejected": -409.8775939941406, "loss": 0.6565, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -1.0015974044799805, "rewards/margins": 0.19808810949325562, "rewards/rejected": -1.1996854543685913, "step": 2550 }, { "epoch": 0.33, "eval_logits/chosen": -2.560555934906006, "eval_logits/rejected": -2.576713800430298, "eval_logps/chosen": -421.6891174316406, "eval_logps/rejected": -416.94012451171875, "eval_loss": 0.6262774467468262, "eval_rewards/accuracies": 0.6399999856948853, "eval_rewards/chosen": -0.8893969655036926, "eval_rewards/margins": 0.26890408992767334, "eval_rewards/rejected": -1.1583009958267212, "eval_runtime": 196.7892, "eval_samples_per_second": 10.163, "eval_steps_per_second": 5.082, "step": 2550 }, { "epoch": 0.33, "learning_rate": 4.205330021683208e-06, "logits/chosen": -2.661653995513916, "logits/rejected": -2.6717755794525146, "logps/chosen": -348.31427001953125, "logps/rejected": -350.11859130859375, "loss": 0.6676, "rewards/accuracies": 0.625, "rewards/chosen": -0.7914212346076965, "rewards/margins": 0.11484186351299286, "rewards/rejected": -0.9062630534172058, "step": 2560 }, { "epoch": 0.33, "eval_logits/chosen": -2.56754207611084, "eval_logits/rejected": -2.583657741546631, "eval_logps/chosen": -412.23150634765625, "eval_logps/rejected": -405.8481750488281, "eval_loss": 0.6265344023704529, "eval_rewards/accuracies": 0.6439999938011169, "eval_rewards/chosen": -0.7948205471038818, "eval_rewards/margins": 0.25256121158599854, "eval_rewards/rejected": -1.04738187789917, "eval_runtime": 196.8811, "eval_samples_per_second": 10.158, "eval_steps_per_second": 5.079, "step": 2560 }, { "epoch": 0.34, "learning_rate": 4.196959925086008e-06, "logits/chosen": -2.756273031234741, "logits/rejected": -2.7312004566192627, "logps/chosen": -399.8543701171875, "logps/rejected": -426.33099365234375, "loss": 0.6483, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.7400082349777222, "rewards/margins": 0.16529114544391632, "rewards/rejected": -0.9052993655204773, "step": 2570 }, { "epoch": 0.34, "eval_logits/chosen": -2.579993724822998, "eval_logits/rejected": -2.595771074295044, "eval_logps/chosen": -402.87548828125, "eval_logps/rejected": -394.6590576171875, "eval_loss": 0.627536952495575, "eval_rewards/accuracies": 0.6489999890327454, "eval_rewards/chosen": -0.701261043548584, "eval_rewards/margins": 0.23422937095165253, "eval_rewards/rejected": -0.9354904890060425, "eval_runtime": 197.3793, "eval_samples_per_second": 10.133, "eval_steps_per_second": 5.066, "step": 2570 }, { "epoch": 0.34, "learning_rate": 4.188554404387588e-06, "logits/chosen": -2.831542730331421, "logits/rejected": -2.8460183143615723, "logps/chosen": -430.8309020996094, "logps/rejected": -411.8692321777344, "loss": 0.6541, "rewards/accuracies": 0.625, "rewards/chosen": -0.7262514233589172, "rewards/margins": 0.167301207780838, "rewards/rejected": -0.8935526609420776, "step": 2580 }, { "epoch": 0.34, "eval_logits/chosen": -2.590602159500122, "eval_logits/rejected": -2.606855630874634, "eval_logps/chosen": -401.1507873535156, "eval_logps/rejected": -392.6910095214844, "eval_loss": 0.6273356676101685, "eval_rewards/accuracies": 0.6495000123977661, "eval_rewards/chosen": -0.6840137839317322, "eval_rewards/margins": 0.23179614543914795, "eval_rewards/rejected": -0.9158099293708801, "eval_runtime": 196.9199, "eval_samples_per_second": 10.156, "eval_steps_per_second": 5.078, "step": 2580 }, { "epoch": 0.34, "learning_rate": 4.180113635053504e-06, "logits/chosen": -2.8526382446289062, "logits/rejected": -2.837333917617798, "logps/chosen": -375.9033508300781, "logps/rejected": -425.47607421875, "loss": 0.6052, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.7050365805625916, "rewards/margins": 0.3069326877593994, "rewards/rejected": -1.0119692087173462, "step": 2590 }, { "epoch": 0.34, "eval_logits/chosen": -2.5913565158843994, "eval_logits/rejected": -2.6081368923187256, "eval_logps/chosen": -404.6280822753906, "eval_logps/rejected": -396.76959228515625, "eval_loss": 0.6276716589927673, "eval_rewards/accuracies": 0.6504999995231628, "eval_rewards/chosen": -0.7187868356704712, "eval_rewards/margins": 0.23780903220176697, "eval_rewards/rejected": -0.9565958976745605, "eval_runtime": 196.9018, "eval_samples_per_second": 10.157, "eval_steps_per_second": 5.079, "step": 2590 }, { "epoch": 0.34, "learning_rate": 4.17163779328513e-06, "logits/chosen": -2.7927684783935547, "logits/rejected": -2.7561044692993164, "logps/chosen": -401.1669921875, "logps/rejected": -393.67791748046875, "loss": 0.6153, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.6651914715766907, "rewards/margins": 0.30985796451568604, "rewards/rejected": -0.9750493764877319, "step": 2600 }, { "epoch": 0.34, "eval_logits/chosen": -2.5893898010253906, "eval_logits/rejected": -2.6071102619171143, "eval_logps/chosen": -411.3906555175781, "eval_logps/rejected": -404.7594909667969, "eval_loss": 0.6282112002372742, "eval_rewards/accuracies": 0.6395000219345093, "eval_rewards/chosen": -0.7864127159118652, "eval_rewards/margins": 0.2500820457935333, "eval_rewards/rejected": -1.0364947319030762, "eval_runtime": 197.2993, "eval_samples_per_second": 10.137, "eval_steps_per_second": 5.068, "step": 2600 }, { "epoch": 0.34, "learning_rate": 4.163127056015975e-06, "logits/chosen": -2.7800028324127197, "logits/rejected": -2.7452735900878906, "logps/chosen": -428.4466857910156, "logps/rejected": -435.05194091796875, "loss": 0.616, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.7805007100105286, "rewards/margins": 0.31978195905685425, "rewards/rejected": -1.1002826690673828, "step": 2610 }, { "epoch": 0.34, "eval_logits/chosen": -2.590467691421509, "eval_logits/rejected": -2.608050584793091, "eval_logps/chosen": -417.75006103515625, "eval_logps/rejected": -411.94488525390625, "eval_loss": 0.6286919116973877, "eval_rewards/accuracies": 0.6380000114440918, "eval_rewards/chosen": -0.8500065207481384, "eval_rewards/margins": 0.258341908454895, "eval_rewards/rejected": -1.1083483695983887, "eval_runtime": 196.9134, "eval_samples_per_second": 10.157, "eval_steps_per_second": 5.078, "step": 2610 }, { "epoch": 0.34, "learning_rate": 4.154581600907994e-06, "logits/chosen": -2.7846765518188477, "logits/rejected": -2.7442469596862793, "logps/chosen": -391.93023681640625, "logps/rejected": -392.11932373046875, "loss": 0.5428, "rewards/accuracies": 0.75, "rewards/chosen": -0.7660864591598511, "rewards/margins": 0.4419211447238922, "rewards/rejected": -1.208007574081421, "step": 2620 }, { "epoch": 0.34, "eval_logits/chosen": -2.6004793643951416, "eval_logits/rejected": -2.6181156635284424, "eval_logps/chosen": -427.6159362792969, "eval_logps/rejected": -423.33331298828125, "eval_loss": 0.6286585927009583, "eval_rewards/accuracies": 0.6365000009536743, "eval_rewards/chosen": -0.9486655592918396, "eval_rewards/margins": 0.2735675275325775, "eval_rewards/rejected": -1.2222331762313843, "eval_runtime": 197.0103, "eval_samples_per_second": 10.152, "eval_steps_per_second": 5.076, "step": 2620 }, { "epoch": 0.34, "learning_rate": 4.14600160634788e-06, "logits/chosen": -2.7774970531463623, "logits/rejected": -2.7458691596984863, "logps/chosen": -388.33575439453125, "logps/rejected": -434.8145446777344, "loss": 0.5982, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.9897419214248657, "rewards/margins": 0.3823489546775818, "rewards/rejected": -1.3720909357070923, "step": 2630 }, { "epoch": 0.34, "eval_logits/chosen": -2.5988712310791016, "eval_logits/rejected": -2.61651873588562, "eval_logps/chosen": -435.7413024902344, "eval_logps/rejected": -432.87921142578125, "eval_loss": 0.630751371383667, "eval_rewards/accuracies": 0.6384999752044678, "eval_rewards/chosen": -1.029918909072876, "eval_rewards/margins": 0.2877727448940277, "eval_rewards/rejected": -1.3176918029785156, "eval_runtime": 197.0253, "eval_samples_per_second": 10.151, "eval_steps_per_second": 5.075, "step": 2630 }, { "epoch": 0.35, "learning_rate": 4.137387251443335e-06, "logits/chosen": -2.788888931274414, "logits/rejected": -2.7759616374969482, "logps/chosen": -409.748291015625, "logps/rejected": -384.11199951171875, "loss": 0.609, "rewards/accuracies": 0.6875, "rewards/chosen": -0.9789314270019531, "rewards/margins": 0.31255120038986206, "rewards/rejected": -1.29148268699646, "step": 2640 }, { "epoch": 0.35, "eval_logits/chosen": -2.6059696674346924, "eval_logits/rejected": -2.623617649078369, "eval_logps/chosen": -432.4779357910156, "eval_logps/rejected": -429.3039245605469, "eval_loss": 0.629350483417511, "eval_rewards/accuracies": 0.6359999775886536, "eval_rewards/chosen": -0.9972848892211914, "eval_rewards/margins": 0.2846539616584778, "eval_rewards/rejected": -1.281938910484314, "eval_runtime": 196.9399, "eval_samples_per_second": 10.155, "eval_steps_per_second": 5.078, "step": 2640 }, { "epoch": 0.35, "learning_rate": 4.128738716019338e-06, "logits/chosen": -2.7614262104034424, "logits/rejected": -2.7496438026428223, "logps/chosen": -448.43798828125, "logps/rejected": -452.8334045410156, "loss": 0.5885, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.8865548968315125, "rewards/margins": 0.39715567231178284, "rewards/rejected": -1.2837104797363281, "step": 2650 }, { "epoch": 0.35, "eval_logits/chosen": -2.608200788497925, "eval_logits/rejected": -2.6260952949523926, "eval_logps/chosen": -431.5092468261719, "eval_logps/rejected": -428.17840576171875, "eval_loss": 0.6292994618415833, "eval_rewards/accuracies": 0.6349999904632568, "eval_rewards/chosen": -0.987598717212677, "eval_rewards/margins": 0.28308507800102234, "eval_rewards/rejected": -1.2706836462020874, "eval_runtime": 196.8459, "eval_samples_per_second": 10.16, "eval_steps_per_second": 5.08, "step": 2650 }, { "epoch": 0.35, "learning_rate": 4.120056180614386e-06, "logits/chosen": -2.6786999702453613, "logits/rejected": -2.662436008453369, "logps/chosen": -402.8612365722656, "logps/rejected": -436.7867126464844, "loss": 0.6198, "rewards/accuracies": 0.6875, "rewards/chosen": -1.0017694234848022, "rewards/margins": 0.2881324291229248, "rewards/rejected": -1.2899019718170166, "step": 2660 }, { "epoch": 0.35, "eval_logits/chosen": -2.5987579822540283, "eval_logits/rejected": -2.618000030517578, "eval_logps/chosen": -432.24072265625, "eval_logps/rejected": -429.0343933105469, "eval_loss": 0.630684494972229, "eval_rewards/accuracies": 0.6355000138282776, "eval_rewards/chosen": -0.9949126243591309, "eval_rewards/margins": 0.28433096408843994, "eval_rewards/rejected": -1.2792433500289917, "eval_runtime": 197.0952, "eval_samples_per_second": 10.147, "eval_steps_per_second": 5.074, "step": 2660 }, { "epoch": 0.35, "learning_rate": 4.111339826476725e-06, "logits/chosen": -2.7120726108551025, "logits/rejected": -2.7115330696105957, "logps/chosen": -393.4505310058594, "logps/rejected": -416.03753662109375, "loss": 0.6223, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.0049831867218018, "rewards/margins": 0.28646284341812134, "rewards/rejected": -1.2914460897445679, "step": 2670 }, { "epoch": 0.35, "eval_logits/chosen": -2.58608341217041, "eval_logits/rejected": -2.6061620712280273, "eval_logps/chosen": -433.83221435546875, "eval_logps/rejected": -431.00537109375, "eval_loss": 0.6331284046173096, "eval_rewards/accuracies": 0.6340000033378601, "eval_rewards/chosen": -1.010827660560608, "eval_rewards/margins": 0.2881257236003876, "eval_rewards/rejected": -1.2989535331726074, "eval_runtime": 196.8111, "eval_samples_per_second": 10.162, "eval_steps_per_second": 5.081, "step": 2670 }, { "epoch": 0.35, "learning_rate": 4.102589835560572e-06, "logits/chosen": -2.7702507972717285, "logits/rejected": -2.7156424522399902, "logps/chosen": -487.67431640625, "logps/rejected": -437.3570251464844, "loss": 0.6479, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.9811779856681824, "rewards/margins": 0.17895013093948364, "rewards/rejected": -1.1601279973983765, "step": 2680 }, { "epoch": 0.35, "eval_logits/chosen": -2.5870747566223145, "eval_logits/rejected": -2.607055902481079, "eval_logps/chosen": -432.8543701171875, "eval_logps/rejected": -430.0517272949219, "eval_loss": 0.6314911842346191, "eval_rewards/accuracies": 0.6365000009536743, "eval_rewards/chosen": -1.0010497570037842, "eval_rewards/margins": 0.2883668541908264, "eval_rewards/rejected": -1.2894165515899658, "eval_runtime": 196.9253, "eval_samples_per_second": 10.156, "eval_steps_per_second": 5.078, "step": 2680 }, { "epoch": 0.35, "learning_rate": 4.09380639052231e-06, "logits/chosen": -2.758643627166748, "logits/rejected": -2.779642105102539, "logps/chosen": -446.88006591796875, "logps/rejected": -497.0462951660156, "loss": 0.5703, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.9354826211929321, "rewards/margins": 0.4081154763698578, "rewards/rejected": -1.3435981273651123, "step": 2690 }, { "epoch": 0.35, "eval_logits/chosen": -2.585550308227539, "eval_logits/rejected": -2.605078935623169, "eval_logps/chosen": -436.9498291015625, "eval_logps/rejected": -434.83868408203125, "eval_loss": 0.6306910514831543, "eval_rewards/accuracies": 0.6365000009536743, "eval_rewards/chosen": -1.0420043468475342, "eval_rewards/margins": 0.2952825129032135, "eval_rewards/rejected": -1.3372868299484253, "eval_runtime": 197.3558, "eval_samples_per_second": 10.134, "eval_steps_per_second": 5.067, "step": 2690 }, { "epoch": 0.35, "learning_rate": 4.084989674716679e-06, "logits/chosen": -2.7644388675689697, "logits/rejected": -2.6968836784362793, "logps/chosen": -450.021484375, "logps/rejected": -462.6543884277344, "loss": 0.6217, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.0556700229644775, "rewards/margins": 0.2913525700569153, "rewards/rejected": -1.3470226526260376, "step": 2700 }, { "epoch": 0.35, "eval_logits/chosen": -2.588069200515747, "eval_logits/rejected": -2.607356309890747, "eval_logps/chosen": -439.2960510253906, "eval_logps/rejected": -437.5450439453125, "eval_loss": 0.6307061910629272, "eval_rewards/accuracies": 0.6334999799728394, "eval_rewards/chosen": -1.065466284751892, "eval_rewards/margins": 0.2988835871219635, "eval_rewards/rejected": -1.3643499612808228, "eval_runtime": 197.153, "eval_samples_per_second": 10.144, "eval_steps_per_second": 5.072, "step": 2700 }, { "epoch": 0.35, "learning_rate": 4.076139872192949e-06, "logits/chosen": -2.795623302459717, "logits/rejected": -2.7657852172851562, "logps/chosen": -493.7920837402344, "logps/rejected": -456.6087951660156, "loss": 0.6502, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.1028454303741455, "rewards/margins": 0.2741045653820038, "rewards/rejected": -1.3769499063491821, "step": 2710 }, { "epoch": 0.35, "eval_logits/chosen": -2.6020870208740234, "eval_logits/rejected": -2.621488332748413, "eval_logps/chosen": -431.4540710449219, "eval_logps/rejected": -428.3634033203125, "eval_loss": 0.6275376081466675, "eval_rewards/accuracies": 0.6349999904632568, "eval_rewards/chosen": -0.9870465993881226, "eval_rewards/margins": 0.28548726439476013, "eval_rewards/rejected": -1.2725337743759155, "eval_runtime": 197.0663, "eval_samples_per_second": 10.149, "eval_steps_per_second": 5.074, "step": 2710 }, { "epoch": 0.36, "learning_rate": 4.067257167691074e-06, "logits/chosen": -2.77093768119812, "logits/rejected": -2.799267292022705, "logps/chosen": -462.2300720214844, "logps/rejected": -478.0562438964844, "loss": 0.6013, "rewards/accuracies": 0.6875, "rewards/chosen": -0.9543863534927368, "rewards/margins": 0.37834474444389343, "rewards/rejected": -1.332731008529663, "step": 2720 }, { "epoch": 0.36, "eval_logits/chosen": -2.616654872894287, "eval_logits/rejected": -2.635721445083618, "eval_logps/chosen": -424.5511169433594, "eval_logps/rejected": -420.2796325683594, "eval_loss": 0.6254580020904541, "eval_rewards/accuracies": 0.6380000114440918, "eval_rewards/chosen": -0.9180174469947815, "eval_rewards/margins": 0.27367839217185974, "eval_rewards/rejected": -1.1916959285736084, "eval_runtime": 197.111, "eval_samples_per_second": 10.147, "eval_steps_per_second": 5.073, "step": 2720 }, { "epoch": 0.36, "learning_rate": 4.05834174663784e-06, "logits/chosen": -2.8080220222473145, "logits/rejected": -2.8545610904693604, "logps/chosen": -444.73626708984375, "logps/rejected": -422.92510986328125, "loss": 0.643, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.9885362386703491, "rewards/margins": 0.29533851146698, "rewards/rejected": -1.283874750137329, "step": 2730 }, { "epoch": 0.36, "eval_logits/chosen": -2.6182029247283936, "eval_logits/rejected": -2.637312173843384, "eval_logps/chosen": -424.8026123046875, "eval_logps/rejected": -420.6578674316406, "eval_loss": 0.6249555945396423, "eval_rewards/accuracies": 0.6399999856948853, "eval_rewards/chosen": -0.9205319881439209, "eval_rewards/margins": 0.27494680881500244, "eval_rewards/rejected": -1.1954787969589233, "eval_runtime": 196.5399, "eval_samples_per_second": 10.176, "eval_steps_per_second": 5.088, "step": 2730 }, { "epoch": 0.36, "learning_rate": 4.0493937951429895e-06, "logits/chosen": -2.8887510299682617, "logits/rejected": -2.891409397125244, "logps/chosen": -423.8211364746094, "logps/rejected": -397.92938232421875, "loss": 0.6072, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.8922034502029419, "rewards/margins": 0.3024117350578308, "rewards/rejected": -1.194615125656128, "step": 2740 }, { "epoch": 0.36, "eval_logits/chosen": -2.6147515773773193, "eval_logits/rejected": -2.6340131759643555, "eval_logps/chosen": -425.80096435546875, "eval_logps/rejected": -421.8180236816406, "eval_loss": 0.6246365308761597, "eval_rewards/accuracies": 0.640999972820282, "eval_rewards/chosen": -0.930515468120575, "eval_rewards/margins": 0.2765650153160095, "eval_rewards/rejected": -1.207080364227295, "eval_runtime": 196.9303, "eval_samples_per_second": 10.156, "eval_steps_per_second": 5.078, "step": 2740 }, { "epoch": 0.36, "learning_rate": 4.040413499995343e-06, "logits/chosen": -2.8133509159088135, "logits/rejected": -2.780090570449829, "logps/chosen": -462.28973388671875, "logps/rejected": -461.85150146484375, "loss": 0.6327, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.9306343197822571, "rewards/margins": 0.24961963295936584, "rewards/rejected": -1.1802538633346558, "step": 2750 }, { "epoch": 0.36, "eval_logits/chosen": -2.6044232845306396, "eval_logits/rejected": -2.624067783355713, "eval_logps/chosen": -425.9725036621094, "eval_logps/rejected": -421.98956298828125, "eval_loss": 0.6255431175231934, "eval_rewards/accuracies": 0.6384999752044678, "eval_rewards/chosen": -0.9322309494018555, "eval_rewards/margins": 0.2765646278858185, "eval_rewards/rejected": -1.208795428276062, "eval_runtime": 196.8045, "eval_samples_per_second": 10.162, "eval_steps_per_second": 5.081, "step": 2750 }, { "epoch": 0.36, "learning_rate": 4.031401048658892e-06, "logits/chosen": -2.771268844604492, "logits/rejected": -2.744429111480713, "logps/chosen": -424.1585388183594, "logps/rejected": -430.1105041503906, "loss": 0.5996, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.855022132396698, "rewards/margins": 0.3389972746372223, "rewards/rejected": -1.1940194368362427, "step": 2760 }, { "epoch": 0.36, "eval_logits/chosen": -2.6032989025115967, "eval_logits/rejected": -2.622894048690796, "eval_logps/chosen": -422.4579772949219, "eval_logps/rejected": -417.8634948730469, "eval_loss": 0.625076949596405, "eval_rewards/accuracies": 0.640999972820282, "eval_rewards/chosen": -0.8970851898193359, "eval_rewards/margins": 0.27044978737831116, "eval_rewards/rejected": -1.1675349473953247, "eval_runtime": 196.8433, "eval_samples_per_second": 10.16, "eval_steps_per_second": 5.08, "step": 2760 }, { "epoch": 0.36, "learning_rate": 4.022356629268894e-06, "logits/chosen": -2.7860825061798096, "logits/rejected": -2.7870800495147705, "logps/chosen": -439.6736755371094, "logps/rejected": -396.2608947753906, "loss": 0.7056, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.9059756994247437, "rewards/margins": 0.08050543814897537, "rewards/rejected": -0.9864810109138489, "step": 2770 }, { "epoch": 0.36, "eval_logits/chosen": -2.6097211837768555, "eval_logits/rejected": -2.628533124923706, "eval_logps/chosen": -416.4322814941406, "eval_logps/rejected": -410.8010559082031, "eval_loss": 0.6238117218017578, "eval_rewards/accuracies": 0.640999972820282, "eval_rewards/chosen": -0.8368287086486816, "eval_rewards/margins": 0.2600819170475006, "eval_rewards/rejected": -1.0969105958938599, "eval_runtime": 196.8627, "eval_samples_per_second": 10.159, "eval_steps_per_second": 5.08, "step": 2770 }, { "epoch": 0.36, "learning_rate": 4.013280430627936e-06, "logits/chosen": -2.759000301361084, "logits/rejected": -2.755174160003662, "logps/chosen": -378.8783264160156, "logps/rejected": -374.7305603027344, "loss": 0.606, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.713367760181427, "rewards/margins": 0.2676360309123993, "rewards/rejected": -0.9810037612915039, "step": 2780 }, { "epoch": 0.36, "eval_logits/chosen": -2.599520444869995, "eval_logits/rejected": -2.6183393001556396, "eval_logps/chosen": -417.6534118652344, "eval_logps/rejected": -412.40203857421875, "eval_loss": 0.6237169504165649, "eval_rewards/accuracies": 0.6399999856948853, "eval_rewards/chosen": -0.849040150642395, "eval_rewards/margins": 0.26388019323349, "eval_rewards/rejected": -1.1129202842712402, "eval_runtime": 196.8922, "eval_samples_per_second": 10.158, "eval_steps_per_second": 5.079, "step": 2780 }, { "epoch": 0.37, "learning_rate": 4.004172642202002e-06, "logits/chosen": -2.7675366401672363, "logits/rejected": -2.753002643585205, "logps/chosen": -393.2950439453125, "logps/rejected": -378.9779052734375, "loss": 0.5751, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.8302526473999023, "rewards/margins": 0.34783655405044556, "rewards/rejected": -1.1780892610549927, "step": 2790 }, { "epoch": 0.37, "eval_logits/chosen": -2.59600830078125, "eval_logits/rejected": -2.6151397228240967, "eval_logps/chosen": -416.169677734375, "eval_logps/rejected": -410.8311767578125, "eval_loss": 0.6246668100357056, "eval_rewards/accuracies": 0.6455000042915344, "eval_rewards/chosen": -0.8342025876045227, "eval_rewards/margins": 0.2630092203617096, "eval_rewards/rejected": -1.0972118377685547, "eval_runtime": 197.1998, "eval_samples_per_second": 10.142, "eval_steps_per_second": 5.071, "step": 2790 }, { "epoch": 0.37, "learning_rate": 3.995033454116512e-06, "logits/chosen": -2.806318759918213, "logits/rejected": -2.800372362136841, "logps/chosen": -448.93524169921875, "logps/rejected": -423.07574462890625, "loss": 0.6504, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.8231765627861023, "rewards/margins": 0.2010866403579712, "rewards/rejected": -1.0242632627487183, "step": 2800 }, { "epoch": 0.37, "eval_logits/chosen": -2.599107027053833, "eval_logits/rejected": -2.6187636852264404, "eval_logps/chosen": -414.1667785644531, "eval_logps/rejected": -408.644287109375, "eval_loss": 0.6250008344650269, "eval_rewards/accuracies": 0.6424999833106995, "eval_rewards/chosen": -0.8141741156578064, "eval_rewards/margins": 0.26116856932640076, "eval_rewards/rejected": -1.0753426551818848, "eval_runtime": 196.7704, "eval_samples_per_second": 10.164, "eval_steps_per_second": 5.082, "step": 2800 }, { "epoch": 0.37, "learning_rate": 3.985863057152355e-06, "logits/chosen": -2.734070301055908, "logits/rejected": -2.781536817550659, "logps/chosen": -441.91015625, "logps/rejected": -449.7099609375, "loss": 0.5513, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.689423143863678, "rewards/margins": 0.409343421459198, "rewards/rejected": -1.098766565322876, "step": 2810 }, { "epoch": 0.37, "eval_logits/chosen": -2.59348201751709, "eval_logits/rejected": -2.6137006282806396, "eval_logps/chosen": -421.896728515625, "eval_logps/rejected": -417.6894226074219, "eval_loss": 0.625147819519043, "eval_rewards/accuracies": 0.6474999785423279, "eval_rewards/chosen": -0.8914732933044434, "eval_rewards/margins": 0.27432069182395935, "eval_rewards/rejected": -1.1657938957214355, "eval_runtime": 196.9116, "eval_samples_per_second": 10.157, "eval_steps_per_second": 5.078, "step": 2810 }, { "epoch": 0.37, "learning_rate": 3.976661642741908e-06, "logits/chosen": -2.7606282234191895, "logits/rejected": -2.7800581455230713, "logps/chosen": -410.58966064453125, "logps/rejected": -452.03851318359375, "loss": 0.5198, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.8148131370544434, "rewards/margins": 0.4949001669883728, "rewards/rejected": -1.3097132444381714, "step": 2820 }, { "epoch": 0.37, "eval_logits/chosen": -2.578408718109131, "eval_logits/rejected": -2.5996177196502686, "eval_logps/chosen": -438.55657958984375, "eval_logps/rejected": -437.1532287597656, "eval_loss": 0.6287716627120972, "eval_rewards/accuracies": 0.6365000009536743, "eval_rewards/chosen": -1.0580713748931885, "eval_rewards/margins": 0.30236053466796875, "eval_rewards/rejected": -1.3604320287704468, "eval_runtime": 197.0899, "eval_samples_per_second": 10.148, "eval_steps_per_second": 5.074, "step": 2820 }, { "epoch": 0.37, "learning_rate": 3.967429402965035e-06, "logits/chosen": -2.628810405731201, "logits/rejected": -2.6278045177459717, "logps/chosen": -470.0814514160156, "logps/rejected": -483.7037658691406, "loss": 0.5981, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.0495673418045044, "rewards/margins": 0.3852699398994446, "rewards/rejected": -1.4348372220993042, "step": 2830 }, { "epoch": 0.37, "eval_logits/chosen": -2.5737946033477783, "eval_logits/rejected": -2.595820426940918, "eval_logps/chosen": -449.00238037109375, "eval_logps/rejected": -449.2751770019531, "eval_loss": 0.6319224834442139, "eval_rewards/accuracies": 0.6355000138282776, "eval_rewards/chosen": -1.162529468536377, "eval_rewards/margins": 0.3191223740577698, "eval_rewards/rejected": -1.481651782989502, "eval_runtime": 196.8504, "eval_samples_per_second": 10.16, "eval_steps_per_second": 5.08, "step": 2830 }, { "epoch": 0.37, "learning_rate": 3.958166530545085e-06, "logits/chosen": -2.759307861328125, "logits/rejected": -2.7708866596221924, "logps/chosen": -453.6480407714844, "logps/rejected": -466.7681579589844, "loss": 0.6637, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.2561802864074707, "rewards/margins": 0.22925932705402374, "rewards/rejected": -1.4854395389556885, "step": 2840 }, { "epoch": 0.37, "eval_logits/chosen": -2.5727250576019287, "eval_logits/rejected": -2.594754695892334, "eval_logps/chosen": -452.2230529785156, "eval_logps/rejected": -453.07086181640625, "eval_loss": 0.6315993666648865, "eval_rewards/accuracies": 0.6334999799728394, "eval_rewards/chosen": -1.1947364807128906, "eval_rewards/margins": 0.3248724937438965, "eval_rewards/rejected": -1.519608974456787, "eval_runtime": 197.1339, "eval_samples_per_second": 10.145, "eval_steps_per_second": 5.073, "step": 2840 }, { "epoch": 0.37, "learning_rate": 3.948873218844863e-06, "logits/chosen": -2.6876654624938965, "logits/rejected": -2.7408440113067627, "logps/chosen": -378.585693359375, "logps/rejected": -445.4602966308594, "loss": 0.6351, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.1722004413604736, "rewards/margins": 0.27464979887008667, "rewards/rejected": -1.446850299835205, "step": 2850 }, { "epoch": 0.37, "eval_logits/chosen": -2.570188283920288, "eval_logits/rejected": -2.592709541320801, "eval_logps/chosen": -452.1654357910156, "eval_logps/rejected": -453.17291259765625, "eval_loss": 0.6327278017997742, "eval_rewards/accuracies": 0.6349999904632568, "eval_rewards/chosen": -1.1941603422164917, "eval_rewards/margins": 0.32646846771240234, "eval_rewards/rejected": -1.520628809928894, "eval_runtime": 197.202, "eval_samples_per_second": 10.142, "eval_steps_per_second": 5.071, "step": 2850 }, { "epoch": 0.37, "learning_rate": 3.939549661862592e-06, "logits/chosen": -2.680032253265381, "logits/rejected": -2.698355197906494, "logps/chosen": -455.81622314453125, "logps/rejected": -460.41375732421875, "loss": 0.6009, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.1447904109954834, "rewards/margins": 0.4577345848083496, "rewards/rejected": -1.602524757385254, "step": 2860 }, { "epoch": 0.37, "eval_logits/chosen": -2.5797622203826904, "eval_logits/rejected": -2.6020236015319824, "eval_logps/chosen": -452.1235656738281, "eval_logps/rejected": -453.2584228515625, "eval_loss": 0.6323604583740234, "eval_rewards/accuracies": 0.6370000243186951, "eval_rewards/chosen": -1.1937412023544312, "eval_rewards/margins": 0.3277431130409241, "eval_rewards/rejected": -1.5214842557907104, "eval_runtime": 196.9343, "eval_samples_per_second": 10.156, "eval_steps_per_second": 5.078, "step": 2860 }, { "epoch": 0.38, "learning_rate": 3.930196054227871e-06, "logits/chosen": -2.7388813495635986, "logits/rejected": -2.705418586730957, "logps/chosen": -421.400634765625, "logps/rejected": -426.3876037597656, "loss": 0.6586, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -1.195313572883606, "rewards/margins": 0.27158278226852417, "rewards/rejected": -1.4668962955474854, "step": 2870 }, { "epoch": 0.38, "eval_logits/chosen": -2.592318058013916, "eval_logits/rejected": -2.6147069931030273, "eval_logps/chosen": -444.8150634765625, "eval_logps/rejected": -444.9077453613281, "eval_loss": 0.6291281580924988, "eval_rewards/accuracies": 0.6420000195503235, "eval_rewards/chosen": -1.1206568479537964, "eval_rewards/margins": 0.3173206150531769, "eval_rewards/rejected": -1.4379774332046509, "eval_runtime": 197.0659, "eval_samples_per_second": 10.149, "eval_steps_per_second": 5.074, "step": 2870 }, { "epoch": 0.38, "learning_rate": 3.920812591197604e-06, "logits/chosen": -2.73275089263916, "logits/rejected": -2.720738410949707, "logps/chosen": -427.51416015625, "logps/rejected": -424.65313720703125, "loss": 0.5718, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.0417402982711792, "rewards/margins": 0.4426051080226898, "rewards/rejected": -1.484345555305481, "step": 2880 }, { "epoch": 0.38, "eval_logits/chosen": -2.5997185707092285, "eval_logits/rejected": -2.622008800506592, "eval_logps/chosen": -433.97705078125, "eval_logps/rejected": -432.5858154296875, "eval_loss": 0.6275606155395508, "eval_rewards/accuracies": 0.6430000066757202, "eval_rewards/chosen": -1.012276291847229, "eval_rewards/margins": 0.3024812638759613, "eval_rewards/rejected": -1.3147575855255127, "eval_runtime": 196.8295, "eval_samples_per_second": 10.161, "eval_steps_per_second": 5.081, "step": 2880 }, { "epoch": 0.38, "learning_rate": 3.9113994686519305e-06, "logits/chosen": -2.7557740211486816, "logits/rejected": -2.757719039916992, "logps/chosen": -435.67156982421875, "logps/rejected": -442.53753662109375, "loss": 0.6098, "rewards/accuracies": 0.6875, "rewards/chosen": -0.9510415196418762, "rewards/margins": 0.32140472531318665, "rewards/rejected": -1.2724463939666748, "step": 2890 }, { "epoch": 0.38, "eval_logits/chosen": -2.592928171157837, "eval_logits/rejected": -2.6148271560668945, "eval_logps/chosen": -428.5566101074219, "eval_logps/rejected": -426.573486328125, "eval_loss": 0.6265643239021301, "eval_rewards/accuracies": 0.6464999914169312, "eval_rewards/chosen": -0.9580718278884888, "eval_rewards/margins": 0.2965623438358307, "eval_rewards/rejected": -1.2546342611312866, "eval_runtime": 196.8971, "eval_samples_per_second": 10.158, "eval_steps_per_second": 5.079, "step": 2890 }, { "epoch": 0.38, "learning_rate": 3.90195688309013e-06, "logits/chosen": -2.7411415576934814, "logits/rejected": -2.716850757598877, "logps/chosen": -407.24639892578125, "logps/rejected": -394.56671142578125, "loss": 0.6727, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.9749676585197449, "rewards/margins": 0.18547670543193817, "rewards/rejected": -1.1604443788528442, "step": 2900 }, { "epoch": 0.38, "eval_logits/chosen": -2.587193250656128, "eval_logits/rejected": -2.6085598468780518, "eval_logps/chosen": -424.8428955078125, "eval_logps/rejected": -422.52862548828125, "eval_loss": 0.6264001131057739, "eval_rewards/accuracies": 0.6495000123977661, "eval_rewards/chosen": -0.9209350347518921, "eval_rewards/margins": 0.2932513654232025, "eval_rewards/rejected": -1.2141865491867065, "eval_runtime": 196.8371, "eval_samples_per_second": 10.161, "eval_steps_per_second": 5.08, "step": 2900 }, { "epoch": 0.38, "learning_rate": 3.892485031626527e-06, "logits/chosen": -2.7525322437286377, "logits/rejected": -2.740018129348755, "logps/chosen": -405.2106018066406, "logps/rejected": -415.08624267578125, "loss": 0.6066, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.8530766367912292, "rewards/margins": 0.3423798978328705, "rewards/rejected": -1.1954563856124878, "step": 2910 }, { "epoch": 0.38, "eval_logits/chosen": -2.585651159286499, "eval_logits/rejected": -2.606193780899048, "eval_logps/chosen": -422.63525390625, "eval_logps/rejected": -420.05523681640625, "eval_loss": 0.624978244304657, "eval_rewards/accuracies": 0.6489999890327454, "eval_rewards/chosen": -0.8988585472106934, "eval_rewards/margins": 0.2905937731266022, "eval_rewards/rejected": -1.1894524097442627, "eval_runtime": 196.9551, "eval_samples_per_second": 10.155, "eval_steps_per_second": 5.077, "step": 2910 }, { "epoch": 0.38, "learning_rate": 3.882984111986371e-06, "logits/chosen": -2.739992141723633, "logits/rejected": -2.7450668811798096, "logps/chosen": -434.47314453125, "logps/rejected": -429.2943420410156, "loss": 0.6094, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.8825608491897583, "rewards/margins": 0.3956514298915863, "rewards/rejected": -1.2782121896743774, "step": 2920 }, { "epoch": 0.38, "eval_logits/chosen": -2.5807759761810303, "eval_logits/rejected": -2.6009206771850586, "eval_logps/chosen": -420.12359619140625, "eval_logps/rejected": -417.17828369140625, "eval_loss": 0.6240187883377075, "eval_rewards/accuracies": 0.6480000019073486, "eval_rewards/chosen": -0.8737419247627258, "eval_rewards/margins": 0.28694066405296326, "eval_rewards/rejected": -1.1606824398040771, "eval_runtime": 196.9637, "eval_samples_per_second": 10.154, "eval_steps_per_second": 5.077, "step": 2920 }, { "epoch": 0.38, "learning_rate": 3.873454322501711e-06, "logits/chosen": -2.7816436290740967, "logits/rejected": -2.789374589920044, "logps/chosen": -427.786376953125, "logps/rejected": -419.85321044921875, "loss": 0.5938, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.7983767986297607, "rewards/margins": 0.3398052752017975, "rewards/rejected": -1.1381819248199463, "step": 2930 }, { "epoch": 0.38, "eval_logits/chosen": -2.579239845275879, "eval_logits/rejected": -2.5991451740264893, "eval_logps/chosen": -418.7933654785156, "eval_logps/rejected": -415.671630859375, "eval_loss": 0.6238669753074646, "eval_rewards/accuracies": 0.6480000019073486, "eval_rewards/chosen": -0.8604398369789124, "eval_rewards/margins": 0.28517666459083557, "eval_rewards/rejected": -1.1456164121627808, "eval_runtime": 197.282, "eval_samples_per_second": 10.138, "eval_steps_per_second": 5.069, "step": 2930 }, { "epoch": 0.38, "learning_rate": 3.863895862107255e-06, "logits/chosen": -2.819079637527466, "logits/rejected": -2.8470709323883057, "logps/chosen": -407.00958251953125, "logps/rejected": -451.1863708496094, "loss": 0.6039, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.8009134531021118, "rewards/margins": 0.3192467987537384, "rewards/rejected": -1.1201602220535278, "step": 2940 }, { "epoch": 0.38, "eval_logits/chosen": -2.5733230113983154, "eval_logits/rejected": -2.5929837226867676, "eval_logps/chosen": -424.0008239746094, "eval_logps/rejected": -421.6430969238281, "eval_loss": 0.6243709921836853, "eval_rewards/accuracies": 0.6470000147819519, "eval_rewards/chosen": -0.9125141501426697, "eval_rewards/margins": 0.2928166389465332, "eval_rewards/rejected": -1.2053308486938477, "eval_runtime": 197.0251, "eval_samples_per_second": 10.151, "eval_steps_per_second": 5.075, "step": 2940 }, { "epoch": 0.39, "learning_rate": 3.854308930336216e-06, "logits/chosen": -2.753868579864502, "logits/rejected": -2.7251639366149902, "logps/chosen": -478.91741943359375, "logps/rejected": -445.16241455078125, "loss": 0.5952, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.8964638710021973, "rewards/margins": 0.3466527760028839, "rewards/rejected": -1.2431166172027588, "step": 2950 }, { "epoch": 0.39, "eval_logits/chosen": -2.5684099197387695, "eval_logits/rejected": -2.5877881050109863, "eval_logps/chosen": -424.83856201171875, "eval_logps/rejected": -422.58837890625, "eval_loss": 0.6245684027671814, "eval_rewards/accuracies": 0.6445000171661377, "eval_rewards/chosen": -0.9208914041519165, "eval_rewards/margins": 0.29389217495918274, "eval_rewards/rejected": -1.2147835493087769, "eval_runtime": 196.9168, "eval_samples_per_second": 10.157, "eval_steps_per_second": 5.078, "step": 2950 }, { "epoch": 0.39, "learning_rate": 3.844693727316151e-06, "logits/chosen": -2.7385358810424805, "logits/rejected": -2.7280914783477783, "logps/chosen": -437.2637634277344, "logps/rejected": -414.7500915527344, "loss": 0.6394, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.9794430732727051, "rewards/margins": 0.25574856996536255, "rewards/rejected": -1.2351915836334229, "step": 2960 }, { "epoch": 0.39, "eval_logits/chosen": -2.565891981124878, "eval_logits/rejected": -2.584840774536133, "eval_logps/chosen": -423.7319641113281, "eval_logps/rejected": -421.17254638671875, "eval_loss": 0.6237717270851135, "eval_rewards/accuracies": 0.6474999785423279, "eval_rewards/chosen": -0.9098256826400757, "eval_rewards/margins": 0.2907992899417877, "eval_rewards/rejected": -1.2006248235702515, "eval_runtime": 196.935, "eval_samples_per_second": 10.156, "eval_steps_per_second": 5.078, "step": 2960 }, { "epoch": 0.39, "learning_rate": 3.835050453764779e-06, "logits/chosen": -2.671020746231079, "logits/rejected": -2.7046775817871094, "logps/chosen": -383.79461669921875, "logps/rejected": -425.920654296875, "loss": 0.509, "rewards/accuracies": 0.8125, "rewards/chosen": -0.8011902570724487, "rewards/margins": 0.6181550621986389, "rewards/rejected": -1.4193452596664429, "step": 2970 }, { "epoch": 0.39, "eval_logits/chosen": -2.560662269592285, "eval_logits/rejected": -2.579688787460327, "eval_logps/chosen": -426.69012451171875, "eval_logps/rejected": -424.6265869140625, "eval_loss": 0.6250145435333252, "eval_rewards/accuracies": 0.6489999890327454, "eval_rewards/chosen": -0.9394070506095886, "eval_rewards/margins": 0.2957586944103241, "eval_rewards/rejected": -1.2351657152175903, "eval_runtime": 196.9601, "eval_samples_per_second": 10.154, "eval_steps_per_second": 5.077, "step": 2970 }, { "epoch": 0.39, "learning_rate": 3.825379310985792e-06, "logits/chosen": -2.7324655055999756, "logits/rejected": -2.7066054344177246, "logps/chosen": -405.8730163574219, "logps/rejected": -424.6570739746094, "loss": 0.6274, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.9839998483657837, "rewards/margins": 0.273231565952301, "rewards/rejected": -1.2572312355041504, "step": 2980 }, { "epoch": 0.39, "eval_logits/chosen": -2.5506222248077393, "eval_logits/rejected": -2.570014715194702, "eval_logps/chosen": -430.77142333984375, "eval_logps/rejected": -429.4747314453125, "eval_loss": 0.6260868310928345, "eval_rewards/accuracies": 0.6495000123977661, "eval_rewards/chosen": -0.9802199602127075, "eval_rewards/margins": 0.3034266531467438, "eval_rewards/rejected": -1.283646583557129, "eval_runtime": 196.9949, "eval_samples_per_second": 10.153, "eval_steps_per_second": 5.076, "step": 2980 }, { "epoch": 0.39, "learning_rate": 3.815680500864651e-06, "logits/chosen": -2.7649083137512207, "logits/rejected": -2.783748149871826, "logps/chosen": -464.8194885253906, "logps/rejected": -430.9786071777344, "loss": 0.6132, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.8606418371200562, "rewards/margins": 0.2772321403026581, "rewards/rejected": -1.1378740072250366, "step": 2990 }, { "epoch": 0.39, "eval_logits/chosen": -2.546297073364258, "eval_logits/rejected": -2.566033124923706, "eval_logps/chosen": -433.58160400390625, "eval_logps/rejected": -432.9014587402344, "eval_loss": 0.6257321834564209, "eval_rewards/accuracies": 0.6464999914169312, "eval_rewards/chosen": -1.00832200050354, "eval_rewards/margins": 0.30959272384643555, "eval_rewards/rejected": -1.3179147243499756, "eval_runtime": 196.9553, "eval_samples_per_second": 10.155, "eval_steps_per_second": 5.077, "step": 2990 }, { "epoch": 0.39, "learning_rate": 3.80595422586438e-06, "logits/chosen": -2.7633798122406006, "logits/rejected": -2.7644972801208496, "logps/chosen": -490.32781982421875, "logps/rejected": -421.77471923828125, "loss": 0.6322, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.9230520129203796, "rewards/margins": 0.32078996300697327, "rewards/rejected": -1.2438418865203857, "step": 3000 }, { "epoch": 0.39, "eval_logits/chosen": -2.5444023609161377, "eval_logits/rejected": -2.564011335372925, "eval_logps/chosen": -433.4969787597656, "eval_logps/rejected": -432.7070007324219, "eval_loss": 0.6249431371688843, "eval_rewards/accuracies": 0.6504999995231628, "eval_rewards/chosen": -1.0074756145477295, "eval_rewards/margins": 0.30849388241767883, "eval_rewards/rejected": -1.315969467163086, "eval_runtime": 196.9191, "eval_samples_per_second": 10.156, "eval_steps_per_second": 5.078, "step": 3000 }, { "epoch": 0.39, "learning_rate": 3.7962006890213266e-06, "logits/chosen": -2.6365857124328613, "logits/rejected": -2.5725252628326416, "logps/chosen": -401.45556640625, "logps/rejected": -403.39056396484375, "loss": 0.6969, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -1.04611074924469, "rewards/margins": 0.12471544742584229, "rewards/rejected": -1.1708260774612427, "step": 3010 }, { "epoch": 0.39, "eval_logits/chosen": -2.5364789962768555, "eval_logits/rejected": -2.555938482284546, "eval_logps/chosen": -430.24176025390625, "eval_logps/rejected": -428.698486328125, "eval_loss": 0.6241666674613953, "eval_rewards/accuracies": 0.6504999995231628, "eval_rewards/chosen": -0.9749231934547424, "eval_rewards/margins": 0.30096182227134705, "eval_rewards/rejected": -1.275884985923767, "eval_runtime": 196.9515, "eval_samples_per_second": 10.155, "eval_steps_per_second": 5.077, "step": 3010 }, { "epoch": 0.4, "learning_rate": 3.7864200939409336e-06, "logits/chosen": -2.708780527114868, "logits/rejected": -2.6882429122924805, "logps/chosen": -422.79168701171875, "logps/rejected": -405.7910461425781, "loss": 0.632, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.8642172813415527, "rewards/margins": 0.2687808573246002, "rewards/rejected": -1.1329978704452515, "step": 3020 }, { "epoch": 0.4, "eval_logits/chosen": -2.543612480163574, "eval_logits/rejected": -2.562998056411743, "eval_logps/chosen": -426.5962219238281, "eval_logps/rejected": -424.1770324707031, "eval_loss": 0.6225207448005676, "eval_rewards/accuracies": 0.6495000123977661, "eval_rewards/chosen": -0.9384684562683105, "eval_rewards/margins": 0.2922017276287079, "eval_rewards/rejected": -1.2306702136993408, "eval_runtime": 197.1525, "eval_samples_per_second": 10.144, "eval_steps_per_second": 5.072, "step": 3020 }, { "epoch": 0.4, "learning_rate": 3.7766126447934857e-06, "logits/chosen": -2.721001148223877, "logits/rejected": -2.756192684173584, "logps/chosen": -382.03985595703125, "logps/rejected": -398.6980895996094, "loss": 0.6207, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.9074760675430298, "rewards/margins": 0.2535129487514496, "rewards/rejected": -1.1609890460968018, "step": 3030 }, { "epoch": 0.4, "eval_logits/chosen": -2.54727840423584, "eval_logits/rejected": -2.5665431022644043, "eval_logps/chosen": -426.14080810546875, "eval_logps/rejected": -423.4004211425781, "eval_loss": 0.6219916939735413, "eval_rewards/accuracies": 0.6510000228881836, "eval_rewards/chosen": -0.9339138269424438, "eval_rewards/margins": 0.28898999094963074, "eval_rewards/rejected": -1.2229039669036865, "eval_runtime": 197.0328, "eval_samples_per_second": 10.151, "eval_steps_per_second": 5.075, "step": 3030 }, { "epoch": 0.4, "learning_rate": 3.766778546309847e-06, "logits/chosen": -2.783926010131836, "logits/rejected": -2.7826411724090576, "logps/chosen": -457.7347106933594, "logps/rejected": -378.9781188964844, "loss": 0.6059, "rewards/accuracies": 0.6875, "rewards/chosen": -0.8478718996047974, "rewards/margins": 0.3275406062602997, "rewards/rejected": -1.1754125356674194, "step": 3040 }, { "epoch": 0.4, "eval_logits/chosen": -2.5473427772521973, "eval_logits/rejected": -2.5660293102264404, "eval_logps/chosen": -423.0438232421875, "eval_logps/rejected": -419.7774353027344, "eval_loss": 0.6212862730026245, "eval_rewards/accuracies": 0.6554999947547913, "eval_rewards/chosen": -0.9029442071914673, "eval_rewards/margins": 0.2837299108505249, "eval_rewards/rejected": -1.1866742372512817, "eval_runtime": 197.0258, "eval_samples_per_second": 10.151, "eval_steps_per_second": 5.075, "step": 3040 }, { "epoch": 0.4, "learning_rate": 3.7569180037771868e-06, "logits/chosen": -2.7684082984924316, "logits/rejected": -2.805574417114258, "logps/chosen": -416.99114990234375, "logps/rejected": -437.90399169921875, "loss": 0.6284, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.9409763216972351, "rewards/margins": 0.28491485118865967, "rewards/rejected": -1.22589111328125, "step": 3050 }, { "epoch": 0.4, "eval_logits/chosen": -2.548220634460449, "eval_logits/rejected": -2.567086696624756, "eval_logps/chosen": -421.3135070800781, "eval_logps/rejected": -417.7491760253906, "eval_loss": 0.621475875377655, "eval_rewards/accuracies": 0.6539999842643738, "eval_rewards/chosen": -0.885640561580658, "eval_rewards/margins": 0.28075098991394043, "eval_rewards/rejected": -1.1663916110992432, "eval_runtime": 197.2358, "eval_samples_per_second": 10.14, "eval_steps_per_second": 5.07, "step": 3050 }, { "epoch": 0.4, "learning_rate": 3.7470312230346955e-06, "logits/chosen": -2.6531074047088623, "logits/rejected": -2.668549060821533, "logps/chosen": -469.8207092285156, "logps/rejected": -427.02337646484375, "loss": 0.5785, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.8234984278678894, "rewards/margins": 0.3786749541759491, "rewards/rejected": -1.2021734714508057, "step": 3060 }, { "epoch": 0.4, "eval_logits/chosen": -2.5429956912994385, "eval_logits/rejected": -2.5619399547576904, "eval_logps/chosen": -421.4232482910156, "eval_logps/rejected": -417.9524230957031, "eval_loss": 0.6219341158866882, "eval_rewards/accuracies": 0.6504999995231628, "eval_rewards/chosen": -0.8867385983467102, "eval_rewards/margins": 0.28168606758117676, "eval_rewards/rejected": -1.1684246063232422, "eval_runtime": 196.8459, "eval_samples_per_second": 10.16, "eval_steps_per_second": 5.08, "step": 3060 }, { "epoch": 0.4, "learning_rate": 3.7371184104692857e-06, "logits/chosen": -2.8001978397369385, "logits/rejected": -2.784719944000244, "logps/chosen": -487.9359436035156, "logps/rejected": -442.556396484375, "loss": 0.6048, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.8620021939277649, "rewards/margins": 0.3240812420845032, "rewards/rejected": -1.186083436012268, "step": 3070 }, { "epoch": 0.4, "eval_logits/chosen": -2.540199041366577, "eval_logits/rejected": -2.5592684745788574, "eval_logps/chosen": -427.2795715332031, "eval_logps/rejected": -424.89849853515625, "eval_loss": 0.621972918510437, "eval_rewards/accuracies": 0.6510000228881836, "eval_rewards/chosen": -0.9453017115592957, "eval_rewards/margins": 0.2925828993320465, "eval_rewards/rejected": -1.2378844022750854, "eval_runtime": 196.8413, "eval_samples_per_second": 10.16, "eval_steps_per_second": 5.08, "step": 3070 }, { "epoch": 0.4, "learning_rate": 3.727179773011289e-06, "logits/chosen": -2.624542713165283, "logits/rejected": -2.65124773979187, "logps/chosen": -452.1876525878906, "logps/rejected": -441.6802673339844, "loss": 0.6668, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.0772656202316284, "rewards/margins": 0.20561933517456055, "rewards/rejected": -1.2828850746154785, "step": 3080 }, { "epoch": 0.4, "eval_logits/chosen": -2.541574001312256, "eval_logits/rejected": -2.5601866245269775, "eval_logps/chosen": -436.0604248046875, "eval_logps/rejected": -434.9568786621094, "eval_loss": 0.6212599873542786, "eval_rewards/accuracies": 0.6510000228881836, "eval_rewards/chosen": -1.0331101417541504, "eval_rewards/margins": 0.3053584396839142, "eval_rewards/rejected": -1.3384685516357422, "eval_runtime": 196.6065, "eval_samples_per_second": 10.173, "eval_steps_per_second": 5.086, "step": 3080 }, { "epoch": 0.4, "learning_rate": 3.717215518130127e-06, "logits/chosen": -2.5789878368377686, "logits/rejected": -2.554394006729126, "logps/chosen": -426.98602294921875, "logps/rejected": -426.3089294433594, "loss": 0.6687, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.1472880840301514, "rewards/margins": 0.18235152959823608, "rewards/rejected": -1.3296396732330322, "step": 3090 }, { "epoch": 0.4, "eval_logits/chosen": -2.5425422191619873, "eval_logits/rejected": -2.560896873474121, "eval_logps/chosen": -438.49700927734375, "eval_logps/rejected": -437.4830322265625, "eval_loss": 0.620273768901825, "eval_rewards/accuracies": 0.6524999737739563, "eval_rewards/chosen": -1.0574753284454346, "eval_rewards/margins": 0.3062548339366913, "eval_rewards/rejected": -1.3637299537658691, "eval_runtime": 196.7941, "eval_samples_per_second": 10.163, "eval_steps_per_second": 5.081, "step": 3090 }, { "epoch": 0.41, "learning_rate": 3.7072258538299923e-06, "logits/chosen": -2.782985210418701, "logits/rejected": -2.7424604892730713, "logps/chosen": -529.3215942382812, "logps/rejected": -452.83001708984375, "loss": 0.5243, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.0583951473236084, "rewards/margins": 0.495781272649765, "rewards/rejected": -1.5541764497756958, "step": 3100 }, { "epoch": 0.41, "eval_logits/chosen": -2.5389695167541504, "eval_logits/rejected": -2.5574052333831787, "eval_logps/chosen": -441.1709289550781, "eval_logps/rejected": -440.71368408203125, "eval_loss": 0.6207247376441956, "eval_rewards/accuracies": 0.6514999866485596, "eval_rewards/chosen": -1.0842152833938599, "eval_rewards/margins": 0.31182152032852173, "eval_rewards/rejected": -1.396036982536316, "eval_runtime": 196.8853, "eval_samples_per_second": 10.158, "eval_steps_per_second": 5.079, "step": 3100 }, { "epoch": 0.41, "learning_rate": 3.6972109886454933e-06, "logits/chosen": -2.6880383491516113, "logits/rejected": -2.7003605365753174, "logps/chosen": -433.5580139160156, "logps/rejected": -437.60015869140625, "loss": 0.6149, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.1408096551895142, "rewards/margins": 0.37903618812561035, "rewards/rejected": -1.519845962524414, "step": 3110 }, { "epoch": 0.41, "eval_logits/chosen": -2.533323287963867, "eval_logits/rejected": -2.5518412590026855, "eval_logps/chosen": -440.2718811035156, "eval_logps/rejected": -439.7763366699219, "eval_loss": 0.6214109063148499, "eval_rewards/accuracies": 0.6524999737739563, "eval_rewards/chosen": -1.0752249956130981, "eval_rewards/margins": 0.3114384114742279, "eval_rewards/rejected": -1.3866634368896484, "eval_runtime": 197.1731, "eval_samples_per_second": 10.143, "eval_steps_per_second": 5.072, "step": 3110 }, { "epoch": 0.41, "learning_rate": 3.687171131637314e-06, "logits/chosen": -2.551008939743042, "logits/rejected": -2.5964572429656982, "logps/chosen": -450.5162658691406, "logps/rejected": -437.95501708984375, "loss": 0.6346, "rewards/accuracies": 0.625, "rewards/chosen": -1.066068172454834, "rewards/margins": 0.3243589699268341, "rewards/rejected": -1.3904269933700562, "step": 3120 }, { "epoch": 0.41, "eval_logits/chosen": -2.5467216968536377, "eval_logits/rejected": -2.5647366046905518, "eval_logps/chosen": -437.42950439453125, "eval_logps/rejected": -436.2012634277344, "eval_loss": 0.6199201941490173, "eval_rewards/accuracies": 0.6499999761581421, "eval_rewards/chosen": -1.0468007326126099, "eval_rewards/margins": 0.3041113018989563, "eval_rewards/rejected": -1.350912094116211, "eval_runtime": 196.9923, "eval_samples_per_second": 10.153, "eval_steps_per_second": 5.076, "step": 3120 }, { "epoch": 0.41, "learning_rate": 3.677106492387839e-06, "logits/chosen": -2.7704672813415527, "logits/rejected": -2.72668719291687, "logps/chosen": -449.903564453125, "logps/rejected": -406.24029541015625, "loss": 0.637, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.063520908355713, "rewards/margins": 0.250002920627594, "rewards/rejected": -1.3135238885879517, "step": 3130 }, { "epoch": 0.41, "eval_logits/chosen": -2.5496647357940674, "eval_logits/rejected": -2.567011833190918, "eval_logps/chosen": -441.9938049316406, "eval_logps/rejected": -440.9230651855469, "eval_loss": 0.6196883320808411, "eval_rewards/accuracies": 0.6495000123977661, "eval_rewards/chosen": -1.0924441814422607, "eval_rewards/margins": 0.3056861162185669, "eval_rewards/rejected": -1.398130178451538, "eval_runtime": 197.0452, "eval_samples_per_second": 10.15, "eval_steps_per_second": 5.075, "step": 3130 }, { "epoch": 0.41, "learning_rate": 3.6670172809967865e-06, "logits/chosen": -2.605725049972534, "logits/rejected": -2.5953238010406494, "logps/chosen": -384.080078125, "logps/rejected": -388.01873779296875, "loss": 0.6076, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.2000583410263062, "rewards/margins": 0.28716519474983215, "rewards/rejected": -1.4872233867645264, "step": 3140 }, { "epoch": 0.41, "eval_logits/chosen": -2.545055866241455, "eval_logits/rejected": -2.561589002609253, "eval_logps/chosen": -448.6625671386719, "eval_logps/rejected": -448.001708984375, "eval_loss": 0.620514988899231, "eval_rewards/accuracies": 0.6514999866485596, "eval_rewards/chosen": -1.159131646156311, "eval_rewards/margins": 0.3097854554653168, "eval_rewards/rejected": -1.4689171314239502, "eval_runtime": 196.9221, "eval_samples_per_second": 10.156, "eval_steps_per_second": 5.078, "step": 3140 }, { "epoch": 0.41, "learning_rate": 3.6569037080768153e-06, "logits/chosen": -2.7470648288726807, "logits/rejected": -2.7412030696868896, "logps/chosen": -421.9847106933594, "logps/rejected": -458.28314208984375, "loss": 0.5972, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.1772805452346802, "rewards/margins": 0.35452547669410706, "rewards/rejected": -1.5318059921264648, "step": 3150 }, { "epoch": 0.41, "eval_logits/chosen": -2.5433573722839355, "eval_logits/rejected": -2.5591485500335693, "eval_logps/chosen": -454.7508850097656, "eval_logps/rejected": -454.5576477050781, "eval_loss": 0.6221292018890381, "eval_rewards/accuracies": 0.6514999866485596, "eval_rewards/chosen": -1.2200146913528442, "eval_rewards/margins": 0.3144617974758148, "eval_rewards/rejected": -1.534476399421692, "eval_runtime": 196.7531, "eval_samples_per_second": 10.165, "eval_steps_per_second": 5.083, "step": 3150 }, { "epoch": 0.41, "learning_rate": 3.646765984749137e-06, "logits/chosen": -2.7149806022644043, "logits/rejected": -2.761202096939087, "logps/chosen": -453.30419921875, "logps/rejected": -484.2044982910156, "loss": 0.6125, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.237396240234375, "rewards/margins": 0.3510487675666809, "rewards/rejected": -1.5884450674057007, "step": 3160 }, { "epoch": 0.41, "eval_logits/chosen": -2.5344886779785156, "eval_logits/rejected": -2.5501887798309326, "eval_logps/chosen": -454.9515686035156, "eval_logps/rejected": -454.9926452636719, "eval_loss": 0.6223093271255493, "eval_rewards/accuracies": 0.6514999866485596, "eval_rewards/chosen": -1.2220218181610107, "eval_rewards/margins": 0.3168042004108429, "eval_rewards/rejected": -1.5388261079788208, "eval_runtime": 197.0041, "eval_samples_per_second": 10.152, "eval_steps_per_second": 5.076, "step": 3160 }, { "epoch": 0.41, "learning_rate": 3.6366043226391e-06, "logits/chosen": -2.574552536010742, "logits/rejected": -2.5905323028564453, "logps/chosen": -460.30157470703125, "logps/rejected": -439.5680236816406, "loss": 0.5862, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.1437591314315796, "rewards/margins": 0.36615657806396484, "rewards/rejected": -1.5099157094955444, "step": 3170 }, { "epoch": 0.41, "eval_logits/chosen": -2.5259342193603516, "eval_logits/rejected": -2.541714906692505, "eval_logps/chosen": -454.07659912109375, "eval_logps/rejected": -454.0869140625, "eval_loss": 0.6219574809074402, "eval_rewards/accuracies": 0.6510000228881836, "eval_rewards/chosen": -1.2132717370986938, "eval_rewards/margins": 0.3164973556995392, "eval_rewards/rejected": -1.5297691822052002, "eval_runtime": 196.8455, "eval_samples_per_second": 10.16, "eval_steps_per_second": 5.08, "step": 3170 }, { "epoch": 0.42, "learning_rate": 3.6264189338717766e-06, "logits/chosen": -2.816720962524414, "logits/rejected": -2.7663235664367676, "logps/chosen": -458.0995178222656, "logps/rejected": -445.95892333984375, "loss": 0.6327, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.203072190284729, "rewards/margins": 0.29866084456443787, "rewards/rejected": -1.5017330646514893, "step": 3180 }, { "epoch": 0.42, "eval_logits/chosen": -2.5217185020446777, "eval_logits/rejected": -2.538130521774292, "eval_logps/chosen": -442.9092102050781, "eval_logps/rejected": -442.09063720703125, "eval_loss": 0.6193828582763672, "eval_rewards/accuracies": 0.6554999947547913, "eval_rewards/chosen": -1.1015980243682861, "eval_rewards/margins": 0.30820852518081665, "eval_rewards/rejected": -1.409806489944458, "eval_runtime": 196.9243, "eval_samples_per_second": 10.156, "eval_steps_per_second": 5.078, "step": 3180 }, { "epoch": 0.42, "learning_rate": 3.6162100310675334e-06, "logits/chosen": -2.7207634449005127, "logits/rejected": -2.713285446166992, "logps/chosen": -447.8941345214844, "logps/rejected": -440.34698486328125, "loss": 0.6965, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -1.179276466369629, "rewards/margins": 0.14491409063339233, "rewards/rejected": -1.324190616607666, "step": 3190 }, { "epoch": 0.42, "eval_logits/chosen": -2.523361921310425, "eval_logits/rejected": -2.5399725437164307, "eval_logps/chosen": -432.1943664550781, "eval_logps/rejected": -430.0140686035156, "eval_loss": 0.6185163259506226, "eval_rewards/accuracies": 0.6524999737739563, "eval_rewards/chosen": -0.9944491982460022, "eval_rewards/margins": 0.2945913076400757, "eval_rewards/rejected": -1.2890405654907227, "eval_runtime": 196.8913, "eval_samples_per_second": 10.158, "eval_steps_per_second": 5.079, "step": 3190 }, { "epoch": 0.42, "learning_rate": 3.605977827337596e-06, "logits/chosen": -2.6888821125030518, "logits/rejected": -2.666752338409424, "logps/chosen": -414.6795349121094, "logps/rejected": -418.9081115722656, "loss": 0.6283, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.9674969911575317, "rewards/margins": 0.2940976917743683, "rewards/rejected": -1.261594533920288, "step": 3200 }, { "epoch": 0.42, "eval_logits/chosen": -2.5268585681915283, "eval_logits/rejected": -2.5438408851623535, "eval_logps/chosen": -426.7724304199219, "eval_logps/rejected": -423.63458251953125, "eval_loss": 0.6185536980628967, "eval_rewards/accuracies": 0.6575000286102295, "eval_rewards/chosen": -0.9402304887771606, "eval_rewards/margins": 0.28501537442207336, "eval_rewards/rejected": -1.2252458333969116, "eval_runtime": 196.6028, "eval_samples_per_second": 10.173, "eval_steps_per_second": 5.086, "step": 3200 }, { "epoch": 0.42, "learning_rate": 3.595722536279595e-06, "logits/chosen": -2.791154146194458, "logits/rejected": -2.726059913635254, "logps/chosen": -487.59375, "logps/rejected": -433.30413818359375, "loss": 0.5662, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.8068972826004028, "rewards/margins": 0.4043118357658386, "rewards/rejected": -1.2112090587615967, "step": 3210 }, { "epoch": 0.42, "eval_logits/chosen": -2.523442268371582, "eval_logits/rejected": -2.5411760807037354, "eval_logps/chosen": -419.7383728027344, "eval_logps/rejected": -415.713134765625, "eval_loss": 0.6193069815635681, "eval_rewards/accuracies": 0.6575000286102295, "eval_rewards/chosen": -0.8698898553848267, "eval_rewards/margins": 0.27614113688468933, "eval_rewards/rejected": -1.1460310220718384, "eval_runtime": 197.011, "eval_samples_per_second": 10.152, "eval_steps_per_second": 5.076, "step": 3210 }, { "epoch": 0.42, "learning_rate": 3.58544437197311e-06, "logits/chosen": -2.6719181537628174, "logits/rejected": -2.6700119972229004, "logps/chosen": -420.42791748046875, "logps/rejected": -409.15576171875, "loss": 0.6209, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.7765008807182312, "rewards/margins": 0.2826058268547058, "rewards/rejected": -1.059106707572937, "step": 3220 }, { "epoch": 0.42, "eval_logits/chosen": -2.522939443588257, "eval_logits/rejected": -2.541029691696167, "eval_logps/chosen": -415.0108947753906, "eval_logps/rejected": -410.4877014160156, "eval_loss": 0.6199746131896973, "eval_rewards/accuracies": 0.6589999794960022, "eval_rewards/chosen": -0.8226147890090942, "eval_rewards/margins": 0.27116167545318604, "eval_rewards/rejected": -1.0937764644622803, "eval_runtime": 197.4604, "eval_samples_per_second": 10.129, "eval_steps_per_second": 5.064, "step": 3220 }, { "epoch": 0.42, "learning_rate": 3.5751435489752025e-06, "logits/chosen": -2.658782958984375, "logits/rejected": -2.6702182292938232, "logps/chosen": -390.2605895996094, "logps/rejected": -382.9984436035156, "loss": 0.6044, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.7846366763114929, "rewards/margins": 0.31734299659729004, "rewards/rejected": -1.1019797325134277, "step": 3230 }, { "epoch": 0.42, "eval_logits/chosen": -2.519177198410034, "eval_logits/rejected": -2.537327289581299, "eval_logps/chosen": -416.0750732421875, "eval_logps/rejected": -412.0194396972656, "eval_loss": 0.6196084022521973, "eval_rewards/accuracies": 0.6575000286102295, "eval_rewards/chosen": -0.8332566618919373, "eval_rewards/margins": 0.2758375108242035, "eval_rewards/rejected": -1.1090940237045288, "eval_runtime": 196.9088, "eval_samples_per_second": 10.157, "eval_steps_per_second": 5.078, "step": 3230 }, { "epoch": 0.42, "learning_rate": 3.5648202823159317e-06, "logits/chosen": -2.649294137954712, "logits/rejected": -2.7054905891418457, "logps/chosen": -371.8926086425781, "logps/rejected": -439.46844482421875, "loss": 0.5752, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.7728925943374634, "rewards/margins": 0.3550662100315094, "rewards/rejected": -1.12795889377594, "step": 3240 }, { "epoch": 0.42, "eval_logits/chosen": -2.492098569869995, "eval_logits/rejected": -2.510578155517578, "eval_logps/chosen": -423.4895935058594, "eval_logps/rejected": -421.0146789550781, "eval_loss": 0.6204763054847717, "eval_rewards/accuracies": 0.6549999713897705, "eval_rewards/chosen": -0.9074018001556396, "eval_rewards/margins": 0.29164472222328186, "eval_rewards/rejected": -1.1990464925765991, "eval_runtime": 197.1081, "eval_samples_per_second": 10.147, "eval_steps_per_second": 5.073, "step": 3240 }, { "epoch": 0.43, "learning_rate": 3.554474787493873e-06, "logits/chosen": -2.5724246501922607, "logits/rejected": -2.557253360748291, "logps/chosen": -461.4246520996094, "logps/rejected": -445.2750549316406, "loss": 0.624, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.9212248921394348, "rewards/margins": 0.33202242851257324, "rewards/rejected": -1.2532472610473633, "step": 3250 }, { "epoch": 0.43, "eval_logits/chosen": -2.46036958694458, "eval_logits/rejected": -2.4790313243865967, "eval_logps/chosen": -429.57958984375, "eval_logps/rejected": -428.1665344238281, "eval_loss": 0.6233686208724976, "eval_rewards/accuracies": 0.6520000100135803, "eval_rewards/chosen": -0.9683018922805786, "eval_rewards/margins": 0.3022630512714386, "eval_rewards/rejected": -1.2705649137496948, "eval_runtime": 196.8966, "eval_samples_per_second": 10.158, "eval_steps_per_second": 5.079, "step": 3250 }, { "epoch": 0.43, "learning_rate": 3.5441072804716125e-06, "logits/chosen": -2.6319198608398438, "logits/rejected": -2.6219584941864014, "logps/chosen": -468.0224609375, "logps/rejected": -497.96771240234375, "loss": 0.5992, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.9127866625785828, "rewards/margins": 0.37219464778900146, "rewards/rejected": -1.2849812507629395, "step": 3260 }, { "epoch": 0.43, "eval_logits/chosen": -2.4434595108032227, "eval_logits/rejected": -2.4621036052703857, "eval_logps/chosen": -435.7987976074219, "eval_logps/rejected": -435.3685302734375, "eval_loss": 0.6249234676361084, "eval_rewards/accuracies": 0.6510000228881836, "eval_rewards/chosen": -1.030493974685669, "eval_rewards/margins": 0.3120914101600647, "eval_rewards/rejected": -1.3425853252410889, "eval_runtime": 197.064, "eval_samples_per_second": 10.149, "eval_steps_per_second": 5.074, "step": 3260 }, { "epoch": 0.43, "learning_rate": 3.5337179776712427e-06, "logits/chosen": -2.5710291862487793, "logits/rejected": -2.5899360179901123, "logps/chosen": -414.93377685546875, "logps/rejected": -444.3470764160156, "loss": 0.5932, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.9402750134468079, "rewards/margins": 0.4441652297973633, "rewards/rejected": -1.3844401836395264, "step": 3270 }, { "epoch": 0.43, "eval_logits/chosen": -2.4319798946380615, "eval_logits/rejected": -2.450648784637451, "eval_logps/chosen": -440.21142578125, "eval_logps/rejected": -440.3194885253906, "eval_loss": 0.6256486773490906, "eval_rewards/accuracies": 0.6489999890327454, "eval_rewards/chosen": -1.074620246887207, "eval_rewards/margins": 0.3174746036529541, "eval_rewards/rejected": -1.3920949697494507, "eval_runtime": 197.099, "eval_samples_per_second": 10.147, "eval_steps_per_second": 5.074, "step": 3270 }, { "epoch": 0.43, "learning_rate": 3.5233070959698445e-06, "logits/chosen": -2.6314568519592285, "logits/rejected": -2.6279854774475098, "logps/chosen": -482.607177734375, "logps/rejected": -463.41656494140625, "loss": 0.6325, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.0531612634658813, "rewards/margins": 0.2862653136253357, "rewards/rejected": -1.3394266366958618, "step": 3280 }, { "epoch": 0.43, "eval_logits/chosen": -2.4338669776916504, "eval_logits/rejected": -2.452253818511963, "eval_logps/chosen": -442.3782043457031, "eval_logps/rejected": -442.60601806640625, "eval_loss": 0.6250951886177063, "eval_rewards/accuracies": 0.6495000123977661, "eval_rewards/chosen": -1.0962878465652466, "eval_rewards/margins": 0.31867215037345886, "eval_rewards/rejected": -1.4149600267410278, "eval_runtime": 197.2018, "eval_samples_per_second": 10.142, "eval_steps_per_second": 5.071, "step": 3280 }, { "epoch": 0.43, "learning_rate": 3.512874852694959e-06, "logits/chosen": -2.5505518913269043, "logits/rejected": -2.5124411582946777, "logps/chosen": -438.30010986328125, "logps/rejected": -424.20489501953125, "loss": 0.6279, "rewards/accuracies": 0.625, "rewards/chosen": -1.0510666370391846, "rewards/margins": 0.2773689329624176, "rewards/rejected": -1.3284354209899902, "step": 3290 }, { "epoch": 0.43, "eval_logits/chosen": -2.4407126903533936, "eval_logits/rejected": -2.458707332611084, "eval_logps/chosen": -442.939697265625, "eval_logps/rejected": -443.0581359863281, "eval_loss": 0.6231091022491455, "eval_rewards/accuracies": 0.6504999995231628, "eval_rewards/chosen": -1.1019030809402466, "eval_rewards/margins": 0.31757813692092896, "eval_rewards/rejected": -1.4194810390472412, "eval_runtime": 197.1085, "eval_samples_per_second": 10.147, "eval_steps_per_second": 5.073, "step": 3290 }, { "epoch": 0.43, "learning_rate": 3.5024214656200497e-06, "logits/chosen": -2.583045482635498, "logits/rejected": -2.540410280227661, "logps/chosen": -454.31658935546875, "logps/rejected": -416.6260681152344, "loss": 0.6383, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.0384652614593506, "rewards/margins": 0.29643210768699646, "rewards/rejected": -1.3348972797393799, "step": 3300 }, { "epoch": 0.43, "eval_logits/chosen": -2.449859619140625, "eval_logits/rejected": -2.4677672386169434, "eval_logps/chosen": -437.662353515625, "eval_logps/rejected": -436.8260192871094, "eval_loss": 0.6216550469398499, "eval_rewards/accuracies": 0.6545000076293945, "eval_rewards/chosen": -1.0491294860839844, "eval_rewards/margins": 0.30803078413009644, "eval_rewards/rejected": -1.3571603298187256, "eval_runtime": 196.8605, "eval_samples_per_second": 10.159, "eval_steps_per_second": 5.08, "step": 3300 }, { "epoch": 0.43, "learning_rate": 3.491947152959958e-06, "logits/chosen": -2.714921474456787, "logits/rejected": -2.687603712081909, "logps/chosen": -469.598388671875, "logps/rejected": -471.0265197753906, "loss": 0.6163, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.0226032733917236, "rewards/margins": 0.3151467442512512, "rewards/rejected": -1.33774995803833, "step": 3310 }, { "epoch": 0.43, "eval_logits/chosen": -2.4600846767425537, "eval_logits/rejected": -2.477358102798462, "eval_logps/chosen": -431.9792785644531, "eval_logps/rejected": -430.2066955566406, "eval_loss": 0.6200381517410278, "eval_rewards/accuracies": 0.656000018119812, "eval_rewards/chosen": -0.9922983646392822, "eval_rewards/margins": 0.29866811633110046, "eval_rewards/rejected": -1.290966272354126, "eval_runtime": 196.7653, "eval_samples_per_second": 10.164, "eval_steps_per_second": 5.082, "step": 3310 }, { "epoch": 0.43, "learning_rate": 3.4814521333663497e-06, "logits/chosen": -2.716564178466797, "logits/rejected": -2.686750888824463, "logps/chosen": -496.6659240722656, "logps/rejected": -436.3720703125, "loss": 0.6353, "rewards/accuracies": 0.6875, "rewards/chosen": -1.007828950881958, "rewards/margins": 0.29953330755233765, "rewards/rejected": -1.3073623180389404, "step": 3320 }, { "epoch": 0.43, "eval_logits/chosen": -2.4711954593658447, "eval_logits/rejected": -2.48844051361084, "eval_logps/chosen": -424.7696228027344, "eval_logps/rejected": -421.8024597167969, "eval_loss": 0.6190692186355591, "eval_rewards/accuracies": 0.6570000052452087, "eval_rewards/chosen": -0.9202021360397339, "eval_rewards/margins": 0.28672224283218384, "eval_rewards/rejected": -1.2069244384765625, "eval_runtime": 196.9176, "eval_samples_per_second": 10.157, "eval_steps_per_second": 5.078, "step": 3320 }, { "epoch": 0.44, "learning_rate": 3.4709366259231468e-06, "logits/chosen": -2.5870168209075928, "logits/rejected": -2.589010238647461, "logps/chosen": -464.08984375, "logps/rejected": -429.16668701171875, "loss": 0.6372, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.9650132060050964, "rewards/margins": 0.26106229424476624, "rewards/rejected": -1.226075530052185, "step": 3330 }, { "epoch": 0.44, "eval_logits/chosen": -2.474238395690918, "eval_logits/rejected": -2.491607666015625, "eval_logps/chosen": -422.61328125, "eval_logps/rejected": -419.35601806640625, "eval_loss": 0.6182964444160461, "eval_rewards/accuracies": 0.6575000286102295, "eval_rewards/chosen": -0.8986384272575378, "eval_rewards/margins": 0.2838219702243805, "eval_rewards/rejected": -1.1824604272842407, "eval_runtime": 197.1115, "eval_samples_per_second": 10.147, "eval_steps_per_second": 5.073, "step": 3330 }, { "epoch": 0.44, "learning_rate": 3.460400850141956e-06, "logits/chosen": -2.6380228996276855, "logits/rejected": -2.552403688430786, "logps/chosen": -395.2093811035156, "logps/rejected": -399.08367919921875, "loss": 0.604, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.9930335283279419, "rewards/margins": 0.3438864052295685, "rewards/rejected": -1.3369200229644775, "step": 3340 }, { "epoch": 0.44, "eval_logits/chosen": -2.4701356887817383, "eval_logits/rejected": -2.487643003463745, "eval_logps/chosen": -425.9539489746094, "eval_logps/rejected": -423.4430236816406, "eval_loss": 0.6179810166358948, "eval_rewards/accuracies": 0.6575000286102295, "eval_rewards/chosen": -0.9320449233055115, "eval_rewards/margins": 0.2912852168083191, "eval_rewards/rejected": -1.2233302593231201, "eval_runtime": 196.7212, "eval_samples_per_second": 10.167, "eval_steps_per_second": 5.083, "step": 3340 }, { "epoch": 0.44, "learning_rate": 3.4498450259574858e-06, "logits/chosen": -2.6182613372802734, "logits/rejected": -2.616330623626709, "logps/chosen": -425.41436767578125, "logps/rejected": -424.77557373046875, "loss": 0.6338, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.9113836288452148, "rewards/margins": 0.2506571114063263, "rewards/rejected": -1.1620408296585083, "step": 3350 }, { "epoch": 0.44, "eval_logits/chosen": -2.476341962814331, "eval_logits/rejected": -2.4934473037719727, "eval_logps/chosen": -433.4237976074219, "eval_logps/rejected": -432.0135803222656, "eval_loss": 0.6166380643844604, "eval_rewards/accuracies": 0.6570000052452087, "eval_rewards/chosen": -1.0067439079284668, "eval_rewards/margins": 0.3022918105125427, "eval_rewards/rejected": -1.3090356588363647, "eval_runtime": 197.0297, "eval_samples_per_second": 10.151, "eval_steps_per_second": 5.075, "step": 3350 }, { "epoch": 0.44, "learning_rate": 3.439269373722957e-06, "logits/chosen": -2.5579094886779785, "logits/rejected": -2.568756580352783, "logps/chosen": -428.636962890625, "logps/rejected": -421.09466552734375, "loss": 0.6361, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.061601996421814, "rewards/margins": 0.2855250835418701, "rewards/rejected": -1.3471271991729736, "step": 3360 }, { "epoch": 0.44, "eval_logits/chosen": -2.4759557247161865, "eval_logits/rejected": -2.4932050704956055, "eval_logps/chosen": -434.44158935546875, "eval_logps/rejected": -433.1445007324219, "eval_loss": 0.6161326169967651, "eval_rewards/accuracies": 0.6585000157356262, "eval_rewards/chosen": -1.016922116279602, "eval_rewards/margins": 0.3034227192401886, "eval_rewards/rejected": -1.3203449249267578, "eval_runtime": 197.0594, "eval_samples_per_second": 10.149, "eval_steps_per_second": 5.075, "step": 3360 }, { "epoch": 0.44, "learning_rate": 3.4286741142055014e-06, "logits/chosen": -2.6796391010284424, "logits/rejected": -2.6622538566589355, "logps/chosen": -454.41412353515625, "logps/rejected": -435.6158142089844, "loss": 0.6455, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.9890263676643372, "rewards/margins": 0.2296716719865799, "rewards/rejected": -1.2186981439590454, "step": 3370 }, { "epoch": 0.44, "eval_logits/chosen": -2.4797682762145996, "eval_logits/rejected": -2.4973316192626953, "eval_logps/chosen": -430.6171875, "eval_logps/rejected": -428.8773193359375, "eval_loss": 0.6161298751831055, "eval_rewards/accuracies": 0.6629999876022339, "eval_rewards/chosen": -0.9786774516105652, "eval_rewards/margins": 0.29899558424949646, "eval_rewards/rejected": -1.2776730060577393, "eval_runtime": 196.995, "eval_samples_per_second": 10.153, "eval_steps_per_second": 5.076, "step": 3370 }, { "epoch": 0.44, "learning_rate": 3.4180594685815536e-06, "logits/chosen": -2.670607328414917, "logits/rejected": -2.6860036849975586, "logps/chosen": -394.56951904296875, "logps/rejected": -408.80474853515625, "loss": 0.6137, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.0444326400756836, "rewards/margins": 0.3033692240715027, "rewards/rejected": -1.347801923751831, "step": 3380 }, { "epoch": 0.44, "eval_logits/chosen": -2.476987838745117, "eval_logits/rejected": -2.4944095611572266, "eval_logps/chosen": -431.58233642578125, "eval_logps/rejected": -429.9381103515625, "eval_loss": 0.6160823702812195, "eval_rewards/accuracies": 0.6625000238418579, "eval_rewards/chosen": -0.9883295893669128, "eval_rewards/margins": 0.29995113611221313, "eval_rewards/rejected": -1.2882806062698364, "eval_runtime": 196.9616, "eval_samples_per_second": 10.154, "eval_steps_per_second": 5.077, "step": 3380 }, { "epoch": 0.44, "learning_rate": 3.4074256584322336e-06, "logits/chosen": -2.5886781215667725, "logits/rejected": -2.577141046524048, "logps/chosen": -398.1769104003906, "logps/rejected": -392.4770202636719, "loss": 0.6181, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.9637646675109863, "rewards/margins": 0.29393166303634644, "rewards/rejected": -1.2576963901519775, "step": 3390 }, { "epoch": 0.44, "eval_logits/chosen": -2.4677271842956543, "eval_logits/rejected": -2.484666347503662, "eval_logps/chosen": -433.7590026855469, "eval_logps/rejected": -432.53369140625, "eval_loss": 0.6153517365455627, "eval_rewards/accuracies": 0.6604999899864197, "eval_rewards/chosen": -1.0100959539413452, "eval_rewards/margins": 0.304141104221344, "eval_rewards/rejected": -1.3142372369766235, "eval_runtime": 197.0278, "eval_samples_per_second": 10.151, "eval_steps_per_second": 5.075, "step": 3390 }, { "epoch": 0.44, "learning_rate": 3.3967729057387213e-06, "logits/chosen": -2.595198392868042, "logits/rejected": -2.5745913982391357, "logps/chosen": -458.33251953125, "logps/rejected": -429.78466796875, "loss": 0.6161, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.9517822265625, "rewards/margins": 0.2866813540458679, "rewards/rejected": -1.2384636402130127, "step": 3400 }, { "epoch": 0.44, "eval_logits/chosen": -2.4600415229797363, "eval_logits/rejected": -2.4762325286865234, "eval_logps/chosen": -438.2619934082031, "eval_logps/rejected": -437.5621337890625, "eval_loss": 0.6144526600837708, "eval_rewards/accuracies": 0.6610000133514404, "eval_rewards/chosen": -1.0551260709762573, "eval_rewards/margins": 0.3093947768211365, "eval_rewards/rejected": -1.3645209074020386, "eval_runtime": 196.8631, "eval_samples_per_second": 10.159, "eval_steps_per_second": 5.08, "step": 3400 }, { "epoch": 0.45, "learning_rate": 3.386101432877624e-06, "logits/chosen": -2.6997172832489014, "logits/rejected": -2.6695003509521484, "logps/chosen": -441.0243225097656, "logps/rejected": -419.212158203125, "loss": 0.6071, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.0293080806732178, "rewards/margins": 0.3362095057964325, "rewards/rejected": -1.3655176162719727, "step": 3410 }, { "epoch": 0.45, "eval_logits/chosen": -2.4570953845977783, "eval_logits/rejected": -2.473388671875, "eval_logps/chosen": -440.8621520996094, "eval_logps/rejected": -440.6937561035156, "eval_loss": 0.6143715977668762, "eval_rewards/accuracies": 0.6589999794960022, "eval_rewards/chosen": -1.081127643585205, "eval_rewards/margins": 0.31470969319343567, "eval_rewards/rejected": -1.3958373069763184, "eval_runtime": 196.9586, "eval_samples_per_second": 10.154, "eval_steps_per_second": 5.077, "step": 3410 }, { "epoch": 0.45, "learning_rate": 3.375411462616332e-06, "logits/chosen": -2.6679186820983887, "logits/rejected": -2.6668734550476074, "logps/chosen": -458.6727600097656, "logps/rejected": -488.9190979003906, "loss": 0.5929, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.086240291595459, "rewards/margins": 0.3417370915412903, "rewards/rejected": -1.4279773235321045, "step": 3420 }, { "epoch": 0.45, "eval_logits/chosen": -2.4514076709747314, "eval_logits/rejected": -2.4678046703338623, "eval_logps/chosen": -444.2358703613281, "eval_logps/rejected": -444.6484680175781, "eval_loss": 0.6145649552345276, "eval_rewards/accuracies": 0.656499981880188, "eval_rewards/chosen": -1.114864468574524, "eval_rewards/margins": 0.3205198347568512, "eval_rewards/rejected": -1.4353843927383423, "eval_runtime": 196.9793, "eval_samples_per_second": 10.153, "eval_steps_per_second": 5.077, "step": 3420 }, { "epoch": 0.45, "learning_rate": 3.3647032181083696e-06, "logits/chosen": -2.7156121730804443, "logits/rejected": -2.707794666290283, "logps/chosen": -506.02716064453125, "logps/rejected": -497.256103515625, "loss": 0.6345, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.1724050045013428, "rewards/margins": 0.2621404528617859, "rewards/rejected": -1.4345453977584839, "step": 3430 }, { "epoch": 0.45, "eval_logits/chosen": -2.4483513832092285, "eval_logits/rejected": -2.464862108230591, "eval_logps/chosen": -444.25457763671875, "eval_logps/rejected": -444.73095703125, "eval_loss": 0.6144143342971802, "eval_rewards/accuracies": 0.659500002861023, "eval_rewards/chosen": -1.1150517463684082, "eval_rewards/margins": 0.32115766406059265, "eval_rewards/rejected": -1.4362094402313232, "eval_runtime": 196.6976, "eval_samples_per_second": 10.168, "eval_steps_per_second": 5.084, "step": 3430 }, { "epoch": 0.45, "learning_rate": 3.3539769228887382e-06, "logits/chosen": -2.6738858222961426, "logits/rejected": -2.6460211277008057, "logps/chosen": -491.38385009765625, "logps/rejected": -500.1153259277344, "loss": 0.5878, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.0534520149230957, "rewards/margins": 0.3450348377227783, "rewards/rejected": -1.398486852645874, "step": 3440 }, { "epoch": 0.45, "eval_logits/chosen": -2.4468305110931396, "eval_logits/rejected": -2.463901996612549, "eval_logps/chosen": -441.3050537109375, "eval_logps/rejected": -441.7398986816406, "eval_loss": 0.6141930222511292, "eval_rewards/accuracies": 0.6600000262260437, "eval_rewards/chosen": -1.0855563879013062, "eval_rewards/margins": 0.32074230909347534, "eval_rewards/rejected": -1.4062987565994263, "eval_runtime": 197.0856, "eval_samples_per_second": 10.148, "eval_steps_per_second": 5.074, "step": 3440 }, { "epoch": 0.45, "learning_rate": 3.343232800869247e-06, "logits/chosen": -2.6060128211975098, "logits/rejected": -2.615265369415283, "logps/chosen": -398.96343994140625, "logps/rejected": -360.34259033203125, "loss": 0.6214, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.0496917963027954, "rewards/margins": 0.25588348507881165, "rewards/rejected": -1.3055751323699951, "step": 3450 }, { "epoch": 0.45, "eval_logits/chosen": -2.4470934867858887, "eval_logits/rejected": -2.4643971920013428, "eval_logps/chosen": -434.67132568359375, "eval_logps/rejected": -434.3101806640625, "eval_loss": 0.6136829853057861, "eval_rewards/accuracies": 0.6604999899864197, "eval_rewards/chosen": -1.019219160079956, "eval_rewards/margins": 0.3127825856208801, "eval_rewards/rejected": -1.3320015668869019, "eval_runtime": 196.7982, "eval_samples_per_second": 10.163, "eval_steps_per_second": 5.081, "step": 3450 }, { "epoch": 0.45, "learning_rate": 3.33247107633384e-06, "logits/chosen": -2.6482961177825928, "logits/rejected": -2.6445212364196777, "logps/chosen": -420.53955078125, "logps/rejected": -450.9517517089844, "loss": 0.5646, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.8963683843612671, "rewards/margins": 0.42489439249038696, "rewards/rejected": -1.3212627172470093, "step": 3460 }, { "epoch": 0.45, "eval_logits/chosen": -2.4395406246185303, "eval_logits/rejected": -2.456937551498413, "eval_logps/chosen": -437.60467529296875, "eval_logps/rejected": -437.75543212890625, "eval_loss": 0.6137276887893677, "eval_rewards/accuracies": 0.6589999794960022, "eval_rewards/chosen": -1.0485526323318481, "eval_rewards/margins": 0.3179013133049011, "eval_rewards/rejected": -1.3664538860321045, "eval_runtime": 196.9894, "eval_samples_per_second": 10.153, "eval_steps_per_second": 5.076, "step": 3460 }, { "epoch": 0.45, "learning_rate": 3.3216919739339155e-06, "logits/chosen": -2.645444869995117, "logits/rejected": -2.592423677444458, "logps/chosen": -463.4039001464844, "logps/rejected": -436.16583251953125, "loss": 0.5993, "rewards/accuracies": 0.6875, "rewards/chosen": -1.0509432554244995, "rewards/margins": 0.3933911621570587, "rewards/rejected": -1.4443343877792358, "step": 3470 }, { "epoch": 0.45, "eval_logits/chosen": -2.4293198585510254, "eval_logits/rejected": -2.4467997550964355, "eval_logps/chosen": -438.691162109375, "eval_logps/rejected": -439.1224060058594, "eval_loss": 0.6141647696495056, "eval_rewards/accuracies": 0.6614999771118164, "eval_rewards/chosen": -1.0594172477722168, "eval_rewards/margins": 0.3207065761089325, "eval_rewards/rejected": -1.3801236152648926, "eval_runtime": 196.8419, "eval_samples_per_second": 10.16, "eval_steps_per_second": 5.08, "step": 3470 }, { "epoch": 0.46, "learning_rate": 3.310895718683635e-06, "logits/chosen": -2.6264524459838867, "logits/rejected": -2.636923313140869, "logps/chosen": -471.2666015625, "logps/rejected": -454.32861328125, "loss": 0.6814, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.0823160409927368, "rewards/margins": 0.1945910006761551, "rewards/rejected": -1.2769070863723755, "step": 3480 }, { "epoch": 0.46, "eval_logits/chosen": -2.4283411502838135, "eval_logits/rejected": -2.4459471702575684, "eval_logps/chosen": -436.0476989746094, "eval_logps/rejected": -436.1694641113281, "eval_loss": 0.6140268445014954, "eval_rewards/accuracies": 0.6654999852180481, "eval_rewards/chosen": -1.0329828262329102, "eval_rewards/margins": 0.31761178374290466, "eval_rewards/rejected": -1.3505945205688477, "eval_runtime": 197.0042, "eval_samples_per_second": 10.152, "eval_steps_per_second": 5.076, "step": 3480 }, { "epoch": 0.46, "learning_rate": 3.3000825359552256e-06, "logits/chosen": -2.6396970748901367, "logits/rejected": -2.6334455013275146, "logps/chosen": -437.2066955566406, "logps/rejected": -457.81744384765625, "loss": 0.6004, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.9559956789016724, "rewards/margins": 0.3328457176685333, "rewards/rejected": -1.2888413667678833, "step": 3490 }, { "epoch": 0.46, "eval_logits/chosen": -2.431947946548462, "eval_logits/rejected": -2.449997663497925, "eval_logps/chosen": -431.141845703125, "eval_logps/rejected": -430.640625, "eval_loss": 0.6140121221542358, "eval_rewards/accuracies": 0.6644999980926514, "eval_rewards/chosen": -0.9839242100715637, "eval_rewards/margins": 0.31138184666633606, "eval_rewards/rejected": -1.2953060865402222, "eval_runtime": 197.1838, "eval_samples_per_second": 10.143, "eval_steps_per_second": 5.071, "step": 3490 }, { "epoch": 0.46, "learning_rate": 3.2892526514742778e-06, "logits/chosen": -2.6109142303466797, "logits/rejected": -2.5949742794036865, "logps/chosen": -440.78692626953125, "logps/rejected": -423.1656188964844, "loss": 0.6039, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.998335063457489, "rewards/margins": 0.3294587731361389, "rewards/rejected": -1.327793836593628, "step": 3500 }, { "epoch": 0.46, "eval_logits/chosen": -2.4382123947143555, "eval_logits/rejected": -2.4562125205993652, "eval_logps/chosen": -426.1593322753906, "eval_logps/rejected": -425.08843994140625, "eval_loss": 0.6142340302467346, "eval_rewards/accuracies": 0.6654999852180481, "eval_rewards/chosen": -0.9340996742248535, "eval_rewards/margins": 0.30568426847457886, "eval_rewards/rejected": -1.2397838830947876, "eval_runtime": 196.947, "eval_samples_per_second": 10.155, "eval_steps_per_second": 5.078, "step": 3500 }, { "epoch": 0.46, "learning_rate": 3.27840629131503e-06, "logits/chosen": -2.6633572578430176, "logits/rejected": -2.6355559825897217, "logps/chosen": -450.248291015625, "logps/rejected": -450.91748046875, "loss": 0.5735, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.9890987277030945, "rewards/margins": 0.45345035195350647, "rewards/rejected": -1.4425491094589233, "step": 3510 }, { "epoch": 0.46, "eval_logits/chosen": -2.434354305267334, "eval_logits/rejected": -2.4525110721588135, "eval_logps/chosen": -424.9874267578125, "eval_logps/rejected": -423.5655212402344, "eval_loss": 0.614112913608551, "eval_rewards/accuracies": 0.6639999747276306, "eval_rewards/chosen": -0.9223799705505371, "eval_rewards/margins": 0.3021751642227173, "eval_rewards/rejected": -1.2245551347732544, "eval_runtime": 196.8662, "eval_samples_per_second": 10.159, "eval_steps_per_second": 5.08, "step": 3510 }, { "epoch": 0.46, "learning_rate": 3.2675436818956522e-06, "logits/chosen": -2.647305488586426, "logits/rejected": -2.6159074306488037, "logps/chosen": -401.4864501953125, "logps/rejected": -410.5609436035156, "loss": 0.6345, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.8999361991882324, "rewards/margins": 0.1908596307039261, "rewards/rejected": -1.0907957553863525, "step": 3520 }, { "epoch": 0.46, "eval_logits/chosen": -2.4260454177856445, "eval_logits/rejected": -2.443659782409668, "eval_logps/chosen": -429.8298645019531, "eval_logps/rejected": -428.6860656738281, "eval_loss": 0.6133183836936951, "eval_rewards/accuracies": 0.6629999876022339, "eval_rewards/chosen": -0.9708043932914734, "eval_rewards/margins": 0.30495625734329224, "eval_rewards/rejected": -1.2757607698440552, "eval_runtime": 197.0424, "eval_samples_per_second": 10.15, "eval_steps_per_second": 5.075, "step": 3520 }, { "epoch": 0.46, "learning_rate": 3.2566650499735185e-06, "logits/chosen": -2.506486415863037, "logits/rejected": -2.539597988128662, "logps/chosen": -454.28802490234375, "logps/rejected": -455.85968017578125, "loss": 0.5534, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.900974452495575, "rewards/margins": 0.45443105697631836, "rewards/rejected": -1.355405569076538, "step": 3530 }, { "epoch": 0.46, "eval_logits/chosen": -2.4236092567443848, "eval_logits/rejected": -2.4410953521728516, "eval_logps/chosen": -433.484619140625, "eval_logps/rejected": -432.83233642578125, "eval_loss": 0.613182008266449, "eval_rewards/accuracies": 0.6625000238418579, "eval_rewards/chosen": -1.0073524713516235, "eval_rewards/margins": 0.3098709285259247, "eval_rewards/rejected": -1.317223310470581, "eval_runtime": 197.0012, "eval_samples_per_second": 10.152, "eval_steps_per_second": 5.076, "step": 3530 }, { "epoch": 0.46, "learning_rate": 3.2457706226404715e-06, "logits/chosen": -2.5730178356170654, "logits/rejected": -2.5727334022521973, "logps/chosen": -440.7850036621094, "logps/rejected": -412.87457275390625, "loss": 0.6593, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -1.0789464712142944, "rewards/margins": 0.23157748579978943, "rewards/rejected": -1.3105241060256958, "step": 3540 }, { "epoch": 0.46, "eval_logits/chosen": -2.4274179935455322, "eval_logits/rejected": -2.4444172382354736, "eval_logps/chosen": -436.7542724609375, "eval_logps/rejected": -436.4505615234375, "eval_loss": 0.6123189926147461, "eval_rewards/accuracies": 0.6635000109672546, "eval_rewards/chosen": -1.0400488376617432, "eval_rewards/margins": 0.31335678696632385, "eval_rewards/rejected": -1.3534057140350342, "eval_runtime": 196.985, "eval_samples_per_second": 10.153, "eval_steps_per_second": 5.077, "step": 3540 }, { "epoch": 0.46, "learning_rate": 3.2348606273180847e-06, "logits/chosen": -2.6839632987976074, "logits/rejected": -2.6603915691375732, "logps/chosen": -475.0283203125, "logps/rejected": -411.526611328125, "loss": 0.5675, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.9772094488143921, "rewards/margins": 0.3736717700958252, "rewards/rejected": -1.3508812189102173, "step": 3550 }, { "epoch": 0.46, "eval_logits/chosen": -2.4247100353240967, "eval_logits/rejected": -2.441316604614258, "eval_logps/chosen": -440.3761901855469, "eval_logps/rejected": -440.4826354980469, "eval_loss": 0.6120977401733398, "eval_rewards/accuracies": 0.6639999747276306, "eval_rewards/chosen": -1.0762676000595093, "eval_rewards/margins": 0.3174583613872528, "eval_rewards/rejected": -1.3937259912490845, "eval_runtime": 197.3208, "eval_samples_per_second": 10.136, "eval_steps_per_second": 5.068, "step": 3550 }, { "epoch": 0.47, "learning_rate": 3.2239352917529165e-06, "logits/chosen": -2.709627389907837, "logits/rejected": -2.689507246017456, "logps/chosen": -493.081298828125, "logps/rejected": -499.88482666015625, "loss": 0.5771, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.0717947483062744, "rewards/margins": 0.4040806293487549, "rewards/rejected": -1.4758752584457397, "step": 3560 }, { "epoch": 0.47, "eval_logits/chosen": -2.418405055999756, "eval_logits/rejected": -2.4349021911621094, "eval_logps/chosen": -442.52825927734375, "eval_logps/rejected": -443.28399658203125, "eval_loss": 0.61195307970047, "eval_rewards/accuracies": 0.6635000109672546, "eval_rewards/chosen": -1.0977885723114014, "eval_rewards/margins": 0.32395121455192566, "eval_rewards/rejected": -1.4217398166656494, "eval_runtime": 196.8612, "eval_samples_per_second": 10.159, "eval_steps_per_second": 5.08, "step": 3560 }, { "epoch": 0.47, "learning_rate": 3.2129948440117487e-06, "logits/chosen": -2.692121744155884, "logits/rejected": -2.6730172634124756, "logps/chosen": -423.61553955078125, "logps/rejected": -441.45556640625, "loss": 0.5887, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.0229573249816895, "rewards/margins": 0.3558308482170105, "rewards/rejected": -1.3787882328033447, "step": 3570 }, { "epoch": 0.47, "eval_logits/chosen": -2.4225990772247314, "eval_logits/rejected": -2.438905715942383, "eval_logps/chosen": -442.7872619628906, "eval_logps/rejected": -443.790283203125, "eval_loss": 0.6118631362915039, "eval_rewards/accuracies": 0.6650000214576721, "eval_rewards/chosen": -1.100378155708313, "eval_rewards/margins": 0.3264242112636566, "eval_rewards/rejected": -1.4268025159835815, "eval_runtime": 196.93, "eval_samples_per_second": 10.156, "eval_steps_per_second": 5.078, "step": 3570 }, { "epoch": 0.47, "learning_rate": 3.202039512476833e-06, "logits/chosen": -2.5658066272735596, "logits/rejected": -2.5501656532287598, "logps/chosen": -401.03814697265625, "logps/rejected": -433.0650939941406, "loss": 0.5473, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.0611445903778076, "rewards/margins": 0.4310608506202698, "rewards/rejected": -1.4922053813934326, "step": 3580 }, { "epoch": 0.47, "eval_logits/chosen": -2.418958902359009, "eval_logits/rejected": -2.4356112480163574, "eval_logps/chosen": -442.1664733886719, "eval_logps/rejected": -443.41705322265625, "eval_loss": 0.6123986840248108, "eval_rewards/accuracies": 0.6620000004768372, "eval_rewards/chosen": -1.0941705703735352, "eval_rewards/margins": 0.32889971137046814, "eval_rewards/rejected": -1.4230701923370361, "eval_runtime": 196.9636, "eval_samples_per_second": 10.154, "eval_steps_per_second": 5.077, "step": 3580 }, { "epoch": 0.47, "learning_rate": 3.1910695258411216e-06, "logits/chosen": -2.648796319961548, "logits/rejected": -2.595101833343506, "logps/chosen": -427.6651306152344, "logps/rejected": -396.94354248046875, "loss": 0.5784, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.957931637763977, "rewards/margins": 0.39088284969329834, "rewards/rejected": -1.3488144874572754, "step": 3590 }, { "epoch": 0.47, "eval_logits/chosen": -2.427513360977173, "eval_logits/rejected": -2.4447529315948486, "eval_logps/chosen": -439.5167541503906, "eval_logps/rejected": -440.6925354003906, "eval_loss": 0.6133009195327759, "eval_rewards/accuracies": 0.6604999899864197, "eval_rewards/chosen": -1.0676734447479248, "eval_rewards/margins": 0.32815155386924744, "eval_rewards/rejected": -1.395824909210205, "eval_runtime": 196.9972, "eval_samples_per_second": 10.152, "eval_steps_per_second": 5.076, "step": 3590 }, { "epoch": 0.47, "learning_rate": 3.1800851131034904e-06, "logits/chosen": -2.6219043731689453, "logits/rejected": -2.624768018722534, "logps/chosen": -436.5489807128906, "logps/rejected": -426.3831481933594, "loss": 0.6345, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.1452642679214478, "rewards/margins": 0.3178446590900421, "rewards/rejected": -1.463108777999878, "step": 3600 }, { "epoch": 0.47, "eval_logits/chosen": -2.4401590824127197, "eval_logits/rejected": -2.4576549530029297, "eval_logps/chosen": -435.9314880371094, "eval_logps/rejected": -436.886962890625, "eval_loss": 0.6135310530662537, "eval_rewards/accuracies": 0.6625000238418579, "eval_rewards/chosen": -1.0318211317062378, "eval_rewards/margins": 0.3259483575820923, "eval_rewards/rejected": -1.35776948928833, "eval_runtime": 196.9714, "eval_samples_per_second": 10.154, "eval_steps_per_second": 5.077, "step": 3600 }, { "epoch": 0.47, "learning_rate": 3.169086503563962e-06, "logits/chosen": -2.6728599071502686, "logits/rejected": -2.660001516342163, "logps/chosen": -411.0406188964844, "logps/rejected": -456.24920654296875, "loss": 0.6347, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.9420035481452942, "rewards/margins": 0.2699907124042511, "rewards/rejected": -1.2119942903518677, "step": 3610 }, { "epoch": 0.47, "eval_logits/chosen": -2.4506571292877197, "eval_logits/rejected": -2.468738079071045, "eval_logps/chosen": -429.9488220214844, "eval_logps/rejected": -430.1793212890625, "eval_loss": 0.6143242120742798, "eval_rewards/accuracies": 0.6620000004768372, "eval_rewards/chosen": -0.9719939827919006, "eval_rewards/margins": 0.3186990022659302, "eval_rewards/rejected": -1.2906930446624756, "eval_runtime": 197.4584, "eval_samples_per_second": 10.129, "eval_steps_per_second": 5.064, "step": 3610 }, { "epoch": 0.47, "learning_rate": 3.1580739268189165e-06, "logits/chosen": -2.660468578338623, "logits/rejected": -2.6029036045074463, "logps/chosen": -440.058349609375, "logps/rejected": -432.2239685058594, "loss": 0.5758, "rewards/accuracies": 0.6875, "rewards/chosen": -0.9574350118637085, "rewards/margins": 0.4528959393501282, "rewards/rejected": -1.4103310108184814, "step": 3620 }, { "epoch": 0.47, "eval_logits/chosen": -2.45180344581604, "eval_logits/rejected": -2.470142364501953, "eval_logps/chosen": -428.6710205078125, "eval_logps/rejected": -428.8226623535156, "eval_loss": 0.6142221093177795, "eval_rewards/accuracies": 0.6660000085830688, "eval_rewards/chosen": -0.959216296672821, "eval_rewards/margins": 0.3179102838039398, "eval_rewards/rejected": -1.2771265506744385, "eval_runtime": 196.9598, "eval_samples_per_second": 10.154, "eval_steps_per_second": 5.077, "step": 3620 }, { "epoch": 0.48, "learning_rate": 3.147047612756302e-06, "logits/chosen": -2.6150004863739014, "logits/rejected": -2.660050630569458, "logps/chosen": -448.8761291503906, "logps/rejected": -477.34722900390625, "loss": 0.5851, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.8641443252563477, "rewards/margins": 0.3695284128189087, "rewards/rejected": -1.233672857284546, "step": 3630 }, { "epoch": 0.48, "eval_logits/chosen": -2.442091226577759, "eval_logits/rejected": -2.460456132888794, "eval_logps/chosen": -430.6168212890625, "eval_logps/rejected": -431.0765686035156, "eval_loss": 0.6144885420799255, "eval_rewards/accuracies": 0.6690000295639038, "eval_rewards/chosen": -0.9786739349365234, "eval_rewards/margins": 0.3209916651248932, "eval_rewards/rejected": -1.2996655702590942, "eval_runtime": 196.9581, "eval_samples_per_second": 10.154, "eval_steps_per_second": 5.077, "step": 3630 }, { "epoch": 0.48, "learning_rate": 3.136007791550833e-06, "logits/chosen": -2.544302463531494, "logits/rejected": -2.532585859298706, "logps/chosen": -399.3792724609375, "logps/rejected": -384.9727478027344, "loss": 0.5792, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.9436852335929871, "rewards/margins": 0.408308744430542, "rewards/rejected": -1.3519941568374634, "step": 3640 }, { "epoch": 0.48, "eval_logits/chosen": -2.4333150386810303, "eval_logits/rejected": -2.451690196990967, "eval_logps/chosen": -435.148193359375, "eval_logps/rejected": -436.144775390625, "eval_loss": 0.6144400238990784, "eval_rewards/accuracies": 0.6639999747276306, "eval_rewards/chosen": -1.0239877700805664, "eval_rewards/margins": 0.32635965943336487, "eval_rewards/rejected": -1.3503473997116089, "eval_runtime": 196.9216, "eval_samples_per_second": 10.156, "eval_steps_per_second": 5.078, "step": 3640 }, { "epoch": 0.48, "learning_rate": 3.1249546936591848e-06, "logits/chosen": -2.6114816665649414, "logits/rejected": -2.5710997581481934, "logps/chosen": -390.9961242675781, "logps/rejected": -406.75762939453125, "loss": 0.6328, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.0057404041290283, "rewards/margins": 0.2645101547241211, "rewards/rejected": -1.2702504396438599, "step": 3650 }, { "epoch": 0.48, "eval_logits/chosen": -2.434093475341797, "eval_logits/rejected": -2.4516608715057373, "eval_logps/chosen": -440.60430908203125, "eval_logps/rejected": -442.5193176269531, "eval_loss": 0.6136277318000793, "eval_rewards/accuracies": 0.6629999876022339, "eval_rewards/chosen": -1.0785490274429321, "eval_rewards/margins": 0.335544228553772, "eval_rewards/rejected": -1.4140933752059937, "eval_runtime": 197.0275, "eval_samples_per_second": 10.151, "eval_steps_per_second": 5.075, "step": 3650 }, { "epoch": 0.48, "learning_rate": 3.1138885498151843e-06, "logits/chosen": -2.520498275756836, "logits/rejected": -2.5581088066101074, "logps/chosen": -451.1759338378906, "logps/rejected": -459.38348388671875, "loss": 0.5169, "rewards/accuracies": 0.75, "rewards/chosen": -1.0737239122390747, "rewards/margins": 0.6751200556755066, "rewards/rejected": -1.7488439083099365, "step": 3660 }, { "epoch": 0.48, "eval_logits/chosen": -2.419292449951172, "eval_logits/rejected": -2.4361040592193604, "eval_logps/chosen": -449.6213073730469, "eval_logps/rejected": -452.7323303222656, "eval_loss": 0.6136282682418823, "eval_rewards/accuracies": 0.6654999852180481, "eval_rewards/chosen": -1.1687185764312744, "eval_rewards/margins": 0.34750431776046753, "eval_rewards/rejected": -1.5162231922149658, "eval_runtime": 196.9553, "eval_samples_per_second": 10.155, "eval_steps_per_second": 5.077, "step": 3660 }, { "epoch": 0.48, "learning_rate": 3.1028095910249937e-06, "logits/chosen": -2.7278361320495605, "logits/rejected": -2.664435863494873, "logps/chosen": -457.93817138671875, "logps/rejected": -419.0298767089844, "loss": 0.5708, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.0790977478027344, "rewards/margins": 0.4191462993621826, "rewards/rejected": -1.498244047164917, "step": 3670 }, { "epoch": 0.48, "eval_logits/chosen": -2.4101860523223877, "eval_logits/rejected": -2.4262466430664062, "eval_logps/chosen": -454.6792297363281, "eval_logps/rejected": -458.39801025390625, "eval_loss": 0.6136077642440796, "eval_rewards/accuracies": 0.6650000214576721, "eval_rewards/chosen": -1.219298005104065, "eval_rewards/margins": 0.3535817563533783, "eval_rewards/rejected": -1.5728797912597656, "eval_runtime": 196.997, "eval_samples_per_second": 10.152, "eval_steps_per_second": 5.076, "step": 3670 }, { "epoch": 0.48, "learning_rate": 3.0917180485622895e-06, "logits/chosen": -2.551952362060547, "logits/rejected": -2.5245137214660645, "logps/chosen": -446.6317443847656, "logps/rejected": -429.69891357421875, "loss": 0.6218, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.197405219078064, "rewards/margins": 0.4143539071083069, "rewards/rejected": -1.6117591857910156, "step": 3680 }, { "epoch": 0.48, "eval_logits/chosen": -2.4147117137908936, "eval_logits/rejected": -2.4315760135650635, "eval_logps/chosen": -444.9193115234375, "eval_logps/rejected": -447.4606018066406, "eval_loss": 0.6137916445732117, "eval_rewards/accuracies": 0.6700000166893005, "eval_rewards/chosen": -1.1216992139816284, "eval_rewards/margins": 0.34180694818496704, "eval_rewards/rejected": -1.4635063409805298, "eval_runtime": 197.2281, "eval_samples_per_second": 10.141, "eval_steps_per_second": 5.07, "step": 3680 }, { "epoch": 0.48, "learning_rate": 3.0806141539634294e-06, "logits/chosen": -2.624244213104248, "logits/rejected": -2.615341901779175, "logps/chosen": -415.6836853027344, "logps/rejected": -387.3684997558594, "loss": 0.6159, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.073737382888794, "rewards/margins": 0.3058207631111145, "rewards/rejected": -1.3795579671859741, "step": 3690 }, { "epoch": 0.48, "eval_logits/chosen": -2.4173150062561035, "eval_logits/rejected": -2.434377908706665, "eval_logps/chosen": -437.3270568847656, "eval_logps/rejected": -438.8552551269531, "eval_loss": 0.6134853363037109, "eval_rewards/accuracies": 0.6644999980926514, "eval_rewards/chosen": -1.0457768440246582, "eval_rewards/margins": 0.33167514204978943, "eval_rewards/rejected": -1.37745201587677, "eval_runtime": 196.9768, "eval_samples_per_second": 10.153, "eval_steps_per_second": 5.077, "step": 3690 }, { "epoch": 0.48, "learning_rate": 3.069498139022624e-06, "logits/chosen": -2.7119345664978027, "logits/rejected": -2.6447341442108154, "logps/chosen": -443.99627685546875, "logps/rejected": -411.5511779785156, "loss": 0.6424, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.0855329036712646, "rewards/margins": 0.26062411069869995, "rewards/rejected": -1.3461570739746094, "step": 3700 }, { "epoch": 0.48, "eval_logits/chosen": -2.4126713275909424, "eval_logits/rejected": -2.429412364959717, "eval_logps/chosen": -434.31158447265625, "eval_logps/rejected": -435.1674499511719, "eval_loss": 0.6130565404891968, "eval_rewards/accuracies": 0.6690000295639038, "eval_rewards/chosen": -1.0156217813491821, "eval_rewards/margins": 0.3249521553516388, "eval_rewards/rejected": -1.3405741453170776, "eval_runtime": 197.1047, "eval_samples_per_second": 10.147, "eval_steps_per_second": 5.073, "step": 3700 }, { "epoch": 0.49, "learning_rate": 3.0583702357870964e-06, "logits/chosen": -2.613340139389038, "logits/rejected": -2.623927593231201, "logps/chosen": -476.4661560058594, "logps/rejected": -504.0572204589844, "loss": 0.6458, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.0363906621932983, "rewards/margins": 0.25275081396102905, "rewards/rejected": -1.2891414165496826, "step": 3710 }, { "epoch": 0.49, "eval_logits/chosen": -2.4173309803009033, "eval_logits/rejected": -2.4342410564422607, "eval_logps/chosen": -429.7125549316406, "eval_logps/rejected": -429.9491882324219, "eval_loss": 0.6135927438735962, "eval_rewards/accuracies": 0.6664999723434448, "eval_rewards/chosen": -0.9696312546730042, "eval_rewards/margins": 0.31876012682914734, "eval_rewards/rejected": -1.288391351699829, "eval_runtime": 197.0089, "eval_samples_per_second": 10.152, "eval_steps_per_second": 5.076, "step": 3710 }, { "epoch": 0.49, "learning_rate": 3.0472306765522393e-06, "logits/chosen": -2.6709144115448, "logits/rejected": -2.689739465713501, "logps/chosen": -409.62322998046875, "logps/rejected": -401.40167236328125, "loss": 0.6061, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.9307082295417786, "rewards/margins": 0.3507843315601349, "rewards/rejected": -1.2814924716949463, "step": 3720 }, { "epoch": 0.49, "eval_logits/chosen": -2.427380323410034, "eval_logits/rejected": -2.4437131881713867, "eval_logps/chosen": -426.92364501953125, "eval_logps/rejected": -426.4053955078125, "eval_loss": 0.6134113073348999, "eval_rewards/accuracies": 0.6625000238418579, "eval_rewards/chosen": -0.9417427778244019, "eval_rewards/margins": 0.311210960149765, "eval_rewards/rejected": -1.2529538869857788, "eval_runtime": 196.8867, "eval_samples_per_second": 10.158, "eval_steps_per_second": 5.079, "step": 3720 }, { "epoch": 0.49, "learning_rate": 3.0360796938567628e-06, "logits/chosen": -2.6675527095794678, "logits/rejected": -2.625060558319092, "logps/chosen": -424.24627685546875, "logps/rejected": -415.8722229003906, "loss": 0.5655, "rewards/accuracies": 0.625, "rewards/chosen": -0.9266021847724915, "rewards/margins": 0.4293970465660095, "rewards/rejected": -1.355999231338501, "step": 3730 }, { "epoch": 0.49, "eval_logits/chosen": -2.4288480281829834, "eval_logits/rejected": -2.444474697113037, "eval_logps/chosen": -431.9655456542969, "eval_logps/rejected": -432.1391296386719, "eval_loss": 0.612882137298584, "eval_rewards/accuracies": 0.6585000157356262, "eval_rewards/chosen": -0.9921613335609436, "eval_rewards/margins": 0.3181297183036804, "eval_rewards/rejected": -1.310291051864624, "eval_runtime": 196.9777, "eval_samples_per_second": 10.153, "eval_steps_per_second": 5.077, "step": 3730 }, { "epoch": 0.49, "learning_rate": 3.0249175204778435e-06, "logits/chosen": -2.667661190032959, "logits/rejected": -2.638627052307129, "logps/chosen": -424.33477783203125, "logps/rejected": -438.136962890625, "loss": 0.5771, "rewards/accuracies": 0.75, "rewards/chosen": -0.9603897333145142, "rewards/margins": 0.4101681113243103, "rewards/rejected": -1.3705580234527588, "step": 3740 }, { "epoch": 0.49, "eval_logits/chosen": -2.425194025039673, "eval_logits/rejected": -2.4405641555786133, "eval_logps/chosen": -436.3723449707031, "eval_logps/rejected": -437.3710021972656, "eval_loss": 0.6123316287994385, "eval_rewards/accuracies": 0.6639999747276306, "eval_rewards/chosen": -1.0362294912338257, "eval_rewards/margins": 0.3263804614543915, "eval_rewards/rejected": -1.36260986328125, "eval_runtime": 196.9857, "eval_samples_per_second": 10.153, "eval_steps_per_second": 5.077, "step": 3740 }, { "epoch": 0.49, "learning_rate": 3.0137443894262634e-06, "logits/chosen": -2.5059690475463867, "logits/rejected": -2.450510025024414, "logps/chosen": -441.3265686035156, "logps/rejected": -425.53814697265625, "loss": 0.545, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.0120147466659546, "rewards/margins": 0.498440682888031, "rewards/rejected": -1.5104554891586304, "step": 3750 }, { "epoch": 0.49, "eval_logits/chosen": -2.417469024658203, "eval_logits/rejected": -2.432871103286743, "eval_logps/chosen": -442.3606262207031, "eval_logps/rejected": -444.4958190917969, "eval_loss": 0.6120953559875488, "eval_rewards/accuracies": 0.6679999828338623, "eval_rewards/chosen": -1.0961120128631592, "eval_rewards/margins": 0.33774587512016296, "eval_rewards/rejected": -1.4338579177856445, "eval_runtime": 197.4301, "eval_samples_per_second": 10.13, "eval_steps_per_second": 5.065, "step": 3750 }, { "epoch": 0.49, "learning_rate": 3.0025605339415476e-06, "logits/chosen": -2.5999059677124023, "logits/rejected": -2.57336163520813, "logps/chosen": -444.1121520996094, "logps/rejected": -437.1282653808594, "loss": 0.5936, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.0731585025787354, "rewards/margins": 0.3845066428184509, "rewards/rejected": -1.4576650857925415, "step": 3760 }, { "epoch": 0.49, "eval_logits/chosen": -2.40948486328125, "eval_logits/rejected": -2.424887180328369, "eval_logps/chosen": -446.61859130859375, "eval_logps/rejected": -449.55389404296875, "eval_loss": 0.6122823357582092, "eval_rewards/accuracies": 0.6650000214576721, "eval_rewards/chosen": -1.1386919021606445, "eval_rewards/margins": 0.3457469046115875, "eval_rewards/rejected": -1.4844387769699097, "eval_runtime": 196.8917, "eval_samples_per_second": 10.158, "eval_steps_per_second": 5.079, "step": 3760 }, { "epoch": 0.49, "learning_rate": 2.9913661874870923e-06, "logits/chosen": -2.5459322929382324, "logits/rejected": -2.5608432292938232, "logps/chosen": -435.58148193359375, "logps/rejected": -438.0994567871094, "loss": 0.5423, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.134264349937439, "rewards/margins": 0.44381627440452576, "rewards/rejected": -1.5780807733535767, "step": 3770 }, { "epoch": 0.49, "eval_logits/chosen": -2.3963429927825928, "eval_logits/rejected": -2.4115374088287354, "eval_logps/chosen": -453.9317321777344, "eval_logps/rejected": -457.8913269042969, "eval_loss": 0.6133984327316284, "eval_rewards/accuracies": 0.6675000190734863, "eval_rewards/chosen": -1.2118229866027832, "eval_rewards/margins": 0.3559902310371399, "eval_rewards/rejected": -1.5678132772445679, "eval_runtime": 196.9816, "eval_samples_per_second": 10.153, "eval_steps_per_second": 5.077, "step": 3770 }, { "epoch": 0.49, "learning_rate": 2.980161583745294e-06, "logits/chosen": -2.5888137817382812, "logits/rejected": -2.574763774871826, "logps/chosen": -495.31396484375, "logps/rejected": -487.2955627441406, "loss": 0.5582, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.2105467319488525, "rewards/margins": 0.5193046927452087, "rewards/rejected": -1.729851484298706, "step": 3780 }, { "epoch": 0.49, "eval_logits/chosen": -2.388474225997925, "eval_logits/rejected": -2.4033782482147217, "eval_logps/chosen": -462.8208312988281, "eval_logps/rejected": -467.9822692871094, "eval_loss": 0.6143119931221008, "eval_rewards/accuracies": 0.6664999723434448, "eval_rewards/chosen": -1.3007144927978516, "eval_rewards/margins": 0.3680078089237213, "eval_rewards/rejected": -1.66872239112854, "eval_runtime": 197.0702, "eval_samples_per_second": 10.149, "eval_steps_per_second": 5.074, "step": 3780 }, { "epoch": 0.5, "learning_rate": 2.96894695661267e-06, "logits/chosen": -2.604504346847534, "logits/rejected": -2.552913188934326, "logps/chosen": -500.4588317871094, "logps/rejected": -461.9434509277344, "loss": 0.6335, "rewards/accuracies": 0.6875, "rewards/chosen": -1.3058234453201294, "rewards/margins": 0.26627081632614136, "rewards/rejected": -1.572094202041626, "step": 3790 }, { "epoch": 0.5, "eval_logits/chosen": -2.3936386108398438, "eval_logits/rejected": -2.4086356163024902, "eval_logps/chosen": -459.9853515625, "eval_logps/rejected": -464.7911071777344, "eval_loss": 0.6135148406028748, "eval_rewards/accuracies": 0.6654999852180481, "eval_rewards/chosen": -1.2723592519760132, "eval_rewards/margins": 0.364451140165329, "eval_rewards/rejected": -1.636810302734375, "eval_runtime": 196.8777, "eval_samples_per_second": 10.159, "eval_steps_per_second": 5.079, "step": 3790 }, { "epoch": 0.5, "learning_rate": 2.9577225401949773e-06, "logits/chosen": -2.5141632556915283, "logits/rejected": -2.5253939628601074, "logps/chosen": -403.39288330078125, "logps/rejected": -421.74432373046875, "loss": 0.6201, "rewards/accuracies": 0.6875, "rewards/chosen": -1.193432092666626, "rewards/margins": 0.3321394920349121, "rewards/rejected": -1.525571346282959, "step": 3800 }, { "epoch": 0.5, "eval_logits/chosen": -2.402308940887451, "eval_logits/rejected": -2.417587995529175, "eval_logps/chosen": -453.1758117675781, "eval_logps/rejected": -457.08599853515625, "eval_loss": 0.6127331256866455, "eval_rewards/accuracies": 0.6685000061988831, "eval_rewards/chosen": -1.2042638063430786, "eval_rewards/margins": 0.35549601912498474, "eval_rewards/rejected": -1.5597598552703857, "eval_runtime": 197.0682, "eval_samples_per_second": 10.149, "eval_steps_per_second": 5.074, "step": 3800 }, { "epoch": 0.5, "learning_rate": 2.946488568802324e-06, "logits/chosen": -2.5278308391571045, "logits/rejected": -2.468945264816284, "logps/chosen": -459.3369140625, "logps/rejected": -458.6595764160156, "loss": 0.6459, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.274712324142456, "rewards/margins": 0.2448592483997345, "rewards/rejected": -1.5195715427398682, "step": 3810 }, { "epoch": 0.5, "eval_logits/chosen": -2.4084715843200684, "eval_logits/rejected": -2.4238317012786865, "eval_logps/chosen": -447.7943420410156, "eval_logps/rejected": -451.07440185546875, "eval_loss": 0.6117669939994812, "eval_rewards/accuracies": 0.6690000295639038, "eval_rewards/chosen": -1.150449514389038, "eval_rewards/margins": 0.34919407963752747, "eval_rewards/rejected": -1.4996436834335327, "eval_runtime": 196.904, "eval_samples_per_second": 10.157, "eval_steps_per_second": 5.079, "step": 3810 }, { "epoch": 0.5, "learning_rate": 2.935245276944278e-06, "logits/chosen": -2.5466935634613037, "logits/rejected": -2.574474811553955, "logps/chosen": -471.45513916015625, "logps/rejected": -456.5826110839844, "loss": 0.6382, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.0596699714660645, "rewards/margins": 0.280234158039093, "rewards/rejected": -1.3399040699005127, "step": 3820 }, { "epoch": 0.5, "eval_logits/chosen": -2.417717218399048, "eval_logits/rejected": -2.4337222576141357, "eval_logps/chosen": -441.5482177734375, "eval_logps/rejected": -443.9344482421875, "eval_loss": 0.6117742657661438, "eval_rewards/accuracies": 0.6625000238418579, "eval_rewards/chosen": -1.0879883766174316, "eval_rewards/margins": 0.3402560353279114, "eval_rewards/rejected": -1.4282443523406982, "eval_runtime": 197.043, "eval_samples_per_second": 10.15, "eval_steps_per_second": 5.075, "step": 3820 }, { "epoch": 0.5, "learning_rate": 2.9239928993249723e-06, "logits/chosen": -2.602570056915283, "logits/rejected": -2.574509382247925, "logps/chosen": -433.97515869140625, "logps/rejected": -443.5970153808594, "loss": 0.5423, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.881142258644104, "rewards/margins": 0.5500217080116272, "rewards/rejected": -1.431164026260376, "step": 3830 }, { "epoch": 0.5, "eval_logits/chosen": -2.4167044162750244, "eval_logits/rejected": -2.433227777481079, "eval_logps/chosen": -440.26531982421875, "eval_logps/rejected": -442.4804382324219, "eval_loss": 0.6125693321228027, "eval_rewards/accuracies": 0.6629999876022339, "eval_rewards/chosen": -1.075158715248108, "eval_rewards/margins": 0.3385455012321472, "eval_rewards/rejected": -1.4137042760849, "eval_runtime": 197.1655, "eval_samples_per_second": 10.144, "eval_steps_per_second": 5.072, "step": 3830 }, { "epoch": 0.5, "learning_rate": 2.912731670838207e-06, "logits/chosen": -2.550351858139038, "logits/rejected": -2.545172691345215, "logps/chosen": -422.2438049316406, "logps/rejected": -444.660888671875, "loss": 0.6351, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.0034587383270264, "rewards/margins": 0.2920977473258972, "rewards/rejected": -1.295556664466858, "step": 3840 }, { "epoch": 0.5, "eval_logits/chosen": -2.4187636375427246, "eval_logits/rejected": -2.4356865882873535, "eval_logps/chosen": -438.0135498046875, "eval_logps/rejected": -440.0002136230469, "eval_loss": 0.6129617691040039, "eval_rewards/accuracies": 0.6625000238418579, "eval_rewards/chosen": -1.0526416301727295, "eval_rewards/margins": 0.33625999093055725, "eval_rewards/rejected": -1.3889015913009644, "eval_runtime": 196.7803, "eval_samples_per_second": 10.164, "eval_steps_per_second": 5.082, "step": 3840 }, { "epoch": 0.5, "learning_rate": 2.901461826562543e-06, "logits/chosen": -2.6022095680236816, "logits/rejected": -2.608586311340332, "logps/chosen": -382.9307556152344, "logps/rejected": -402.4649353027344, "loss": 0.5856, "rewards/accuracies": 0.6875, "rewards/chosen": -1.0170080661773682, "rewards/margins": 0.3895450234413147, "rewards/rejected": -1.4065531492233276, "step": 3850 }, { "epoch": 0.5, "eval_logits/chosen": -2.4154767990112305, "eval_logits/rejected": -2.4327642917633057, "eval_logps/chosen": -433.6244812011719, "eval_logps/rejected": -435.01007080078125, "eval_loss": 0.6131948232650757, "eval_rewards/accuracies": 0.6614999771118164, "eval_rewards/chosen": -1.0087506771087646, "eval_rewards/margins": 0.3302498161792755, "eval_rewards/rejected": -1.3390004634857178, "eval_runtime": 197.0186, "eval_samples_per_second": 10.151, "eval_steps_per_second": 5.076, "step": 3850 }, { "epoch": 0.51, "learning_rate": 2.8901836017563966e-06, "logits/chosen": -2.5830795764923096, "logits/rejected": -2.559356689453125, "logps/chosen": -422.36932373046875, "logps/rejected": -424.05303955078125, "loss": 0.6039, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.953314483165741, "rewards/margins": 0.3424040675163269, "rewards/rejected": -1.2957185506820679, "step": 3860 }, { "epoch": 0.51, "eval_logits/chosen": -2.411612033843994, "eval_logits/rejected": -2.4291622638702393, "eval_logps/chosen": -432.36639404296875, "eval_logps/rejected": -433.6270446777344, "eval_loss": 0.6130424737930298, "eval_rewards/accuracies": 0.6644999980926514, "eval_rewards/chosen": -0.9961698055267334, "eval_rewards/margins": 0.32900041341781616, "eval_rewards/rejected": -1.3251702785491943, "eval_runtime": 196.9225, "eval_samples_per_second": 10.156, "eval_steps_per_second": 5.078, "step": 3860 }, { "epoch": 0.51, "learning_rate": 2.8788972318531272e-06, "logits/chosen": -2.541175127029419, "logits/rejected": -2.5342342853546143, "logps/chosen": -417.62567138671875, "logps/rejected": -431.13104248046875, "loss": 0.6142, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.095897912979126, "rewards/margins": 0.3115997314453125, "rewards/rejected": -1.4074976444244385, "step": 3870 }, { "epoch": 0.51, "eval_logits/chosen": -2.402695417404175, "eval_logits/rejected": -2.4197804927825928, "eval_logps/chosen": -436.37451171875, "eval_logps/rejected": -438.23828125, "eval_loss": 0.6130448579788208, "eval_rewards/accuracies": 0.6660000085830688, "eval_rewards/chosen": -1.036251187324524, "eval_rewards/margins": 0.3350312411785126, "eval_rewards/rejected": -1.3712825775146484, "eval_runtime": 197.1842, "eval_samples_per_second": 10.143, "eval_steps_per_second": 5.071, "step": 3870 }, { "epoch": 0.51, "learning_rate": 2.8676029524561255e-06, "logits/chosen": -2.5351319313049316, "logits/rejected": -2.587127447128296, "logps/chosen": -466.9495544433594, "logps/rejected": -477.93048095703125, "loss": 0.6128, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.0128754377365112, "rewards/margins": 0.34568047523498535, "rewards/rejected": -1.3585560321807861, "step": 3880 }, { "epoch": 0.51, "eval_logits/chosen": -2.406038284301758, "eval_logits/rejected": -2.422903299331665, "eval_logps/chosen": -438.3699951171875, "eval_logps/rejected": -440.46466064453125, "eval_loss": 0.6125989556312561, "eval_rewards/accuracies": 0.6685000061988831, "eval_rewards/chosen": -1.0562056303024292, "eval_rewards/margins": 0.33734050393104553, "eval_rewards/rejected": -1.393546223640442, "eval_runtime": 197.0942, "eval_samples_per_second": 10.147, "eval_steps_per_second": 5.074, "step": 3880 }, { "epoch": 0.51, "learning_rate": 2.8563009993338906e-06, "logits/chosen": -2.5570359230041504, "logits/rejected": -2.5582470893859863, "logps/chosen": -413.457275390625, "logps/rejected": -443.176513671875, "loss": 0.5771, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.0789310932159424, "rewards/margins": 0.4563199579715729, "rewards/rejected": -1.5352510213851929, "step": 3890 }, { "epoch": 0.51, "eval_logits/chosen": -2.400045871734619, "eval_logits/rejected": -2.416555643081665, "eval_logps/chosen": -444.1776428222656, "eval_logps/rejected": -447.1835632324219, "eval_loss": 0.6128532290458679, "eval_rewards/accuracies": 0.6629999876022339, "eval_rewards/chosen": -1.1142823696136475, "eval_rewards/margins": 0.3464534878730774, "eval_rewards/rejected": -1.46073579788208, "eval_runtime": 197.0771, "eval_samples_per_second": 10.148, "eval_steps_per_second": 5.074, "step": 3890 }, { "epoch": 0.51, "learning_rate": 2.844991608415113e-06, "logits/chosen": -2.6397032737731934, "logits/rejected": -2.6185808181762695, "logps/chosen": -454.1117248535156, "logps/rejected": -481.529541015625, "loss": 0.6089, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.205904245376587, "rewards/margins": 0.36940625309944153, "rewards/rejected": -1.575310468673706, "step": 3900 }, { "epoch": 0.51, "eval_logits/chosen": -2.393319845199585, "eval_logits/rejected": -2.409630537033081, "eval_logps/chosen": -447.9149169921875, "eval_logps/rejected": -451.6171875, "eval_loss": 0.6128678917884827, "eval_rewards/accuracies": 0.659500002861023, "eval_rewards/chosen": -1.1516549587249756, "eval_rewards/margins": 0.35341697931289673, "eval_rewards/rejected": -1.5050721168518066, "eval_runtime": 196.9582, "eval_samples_per_second": 10.154, "eval_steps_per_second": 5.077, "step": 3900 }, { "epoch": 0.51, "learning_rate": 2.833675015783746e-06, "logits/chosen": -2.552631378173828, "logits/rejected": -2.571286678314209, "logps/chosen": -406.80224609375, "logps/rejected": -457.42413330078125, "loss": 0.5962, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.2279528379440308, "rewards/margins": 0.39623597264289856, "rewards/rejected": -1.624189019203186, "step": 3910 }, { "epoch": 0.51, "eval_logits/chosen": -2.378218650817871, "eval_logits/rejected": -2.3942618370056152, "eval_logps/chosen": -455.72216796875, "eval_logps/rejected": -460.4048156738281, "eval_loss": 0.6134702563285828, "eval_rewards/accuracies": 0.6639999747276306, "eval_rewards/chosen": -1.2297275066375732, "eval_rewards/margins": 0.36322060227394104, "eval_rewards/rejected": -1.5929479598999023, "eval_runtime": 197.1673, "eval_samples_per_second": 10.144, "eval_steps_per_second": 5.072, "step": 3910 }, { "epoch": 0.51, "learning_rate": 2.8223514576740784e-06, "logits/chosen": -2.4648399353027344, "logits/rejected": -2.447777509689331, "logps/chosen": -392.42431640625, "logps/rejected": -459.552001953125, "loss": 0.6028, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.0850086212158203, "rewards/margins": 0.3459857106208801, "rewards/rejected": -1.4309942722320557, "step": 3920 }, { "epoch": 0.51, "eval_logits/chosen": -2.3691859245300293, "eval_logits/rejected": -2.3851804733276367, "eval_logps/chosen": -459.0703430175781, "eval_logps/rejected": -464.10882568359375, "eval_loss": 0.6140798330307007, "eval_rewards/accuracies": 0.6625000238418579, "eval_rewards/chosen": -1.263209342956543, "eval_rewards/margins": 0.3667786419391632, "eval_rewards/rejected": -1.6299879550933838, "eval_runtime": 197.0307, "eval_samples_per_second": 10.151, "eval_steps_per_second": 5.075, "step": 3920 }, { "epoch": 0.51, "learning_rate": 2.8110211704658073e-06, "logits/chosen": -2.529292106628418, "logits/rejected": -2.50898814201355, "logps/chosen": -500.024169921875, "logps/rejected": -481.23046875, "loss": 0.5829, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.2268387079238892, "rewards/margins": 0.4068872034549713, "rewards/rejected": -1.6337261199951172, "step": 3930 }, { "epoch": 0.51, "eval_logits/chosen": -2.362501859664917, "eval_logits/rejected": -2.3783164024353027, "eval_logps/chosen": -459.94146728515625, "eval_logps/rejected": -465.1200256347656, "eval_loss": 0.6143542528152466, "eval_rewards/accuracies": 0.6639999747276306, "eval_rewards/chosen": -1.2719205617904663, "eval_rewards/margins": 0.36817923188209534, "eval_rewards/rejected": -1.6400996446609497, "eval_runtime": 196.9832, "eval_samples_per_second": 10.153, "eval_steps_per_second": 5.077, "step": 3930 }, { "epoch": 0.52, "learning_rate": 2.7996843906790955e-06, "logits/chosen": -2.480191946029663, "logits/rejected": -2.438917636871338, "logps/chosen": -436.451904296875, "logps/rejected": -451.19195556640625, "loss": 0.6861, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.261759877204895, "rewards/margins": 0.2249007672071457, "rewards/rejected": -1.4866605997085571, "step": 3940 }, { "epoch": 0.52, "eval_logits/chosen": -2.3652889728546143, "eval_logits/rejected": -2.380469560623169, "eval_logps/chosen": -463.49749755859375, "eval_logps/rejected": -469.0307312011719, "eval_loss": 0.6125316619873047, "eval_rewards/accuracies": 0.6685000061988831, "eval_rewards/chosen": -1.3074814081192017, "eval_rewards/margins": 0.37172552943229675, "eval_rewards/rejected": -1.6792069673538208, "eval_runtime": 197.3734, "eval_samples_per_second": 10.133, "eval_steps_per_second": 5.067, "step": 3940 }, { "epoch": 0.52, "learning_rate": 2.7883413549696396e-06, "logits/chosen": -2.589012622833252, "logits/rejected": -2.5272421836853027, "logps/chosen": -488.56494140625, "logps/rejected": -494.85833740234375, "loss": 0.538, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.3187625408172607, "rewards/margins": 0.47562170028686523, "rewards/rejected": -1.7943843603134155, "step": 3950 }, { "epoch": 0.52, "eval_logits/chosen": -2.3566412925720215, "eval_logits/rejected": -2.3713998794555664, "eval_logps/chosen": -467.7171630859375, "eval_logps/rejected": -473.5096130371094, "eval_loss": 0.612465500831604, "eval_rewards/accuracies": 0.6694999933242798, "eval_rewards/chosen": -1.3496776819229126, "eval_rewards/margins": 0.37431854009628296, "eval_rewards/rejected": -1.7239962816238403, "eval_runtime": 196.8504, "eval_samples_per_second": 10.16, "eval_steps_per_second": 5.08, "step": 3950 }, { "epoch": 0.52, "learning_rate": 2.776992300123732e-06, "logits/chosen": -2.451707124710083, "logits/rejected": -2.446232318878174, "logps/chosen": -421.21923828125, "logps/rejected": -454.6561584472656, "loss": 0.6141, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.2354681491851807, "rewards/margins": 0.4316517412662506, "rewards/rejected": -1.6671197414398193, "step": 3960 }, { "epoch": 0.52, "eval_logits/chosen": -2.3530004024505615, "eval_logits/rejected": -2.3678534030914307, "eval_logps/chosen": -468.481201171875, "eval_logps/rejected": -474.32135009765625, "eval_loss": 0.6124312877655029, "eval_rewards/accuracies": 0.6690000295639038, "eval_rewards/chosen": -1.3573178052902222, "eval_rewards/margins": 0.3747956454753876, "eval_rewards/rejected": -1.7321133613586426, "eval_runtime": 196.8704, "eval_samples_per_second": 10.159, "eval_steps_per_second": 5.079, "step": 3960 }, { "epoch": 0.52, "learning_rate": 2.7656374630533113e-06, "logits/chosen": -2.5897960662841797, "logits/rejected": -2.5861315727233887, "logps/chosen": -422.38079833984375, "logps/rejected": -462.06964111328125, "loss": 0.5655, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.283445119857788, "rewards/margins": 0.44523996114730835, "rewards/rejected": -1.7286850214004517, "step": 3970 }, { "epoch": 0.52, "eval_logits/chosen": -2.3446178436279297, "eval_logits/rejected": -2.359534740447998, "eval_logps/chosen": -471.0897521972656, "eval_logps/rejected": -477.40899658203125, "eval_loss": 0.6137044429779053, "eval_rewards/accuracies": 0.6654999852180481, "eval_rewards/chosen": -1.3834036588668823, "eval_rewards/margins": 0.37958598136901855, "eval_rewards/rejected": -1.7629896402359009, "eval_runtime": 196.7457, "eval_samples_per_second": 10.165, "eval_steps_per_second": 5.083, "step": 3970 }, { "epoch": 0.52, "learning_rate": 2.754277080791021e-06, "logits/chosen": -2.482008457183838, "logits/rejected": -2.4874167442321777, "logps/chosen": -466.5902404785156, "logps/rejected": -471.9017639160156, "loss": 0.7222, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -1.412452220916748, "rewards/margins": 0.17661504447460175, "rewards/rejected": -1.5890672206878662, "step": 3980 }, { "epoch": 0.52, "eval_logits/chosen": -2.3433659076690674, "eval_logits/rejected": -2.3585433959960938, "eval_logps/chosen": -469.8363037109375, "eval_logps/rejected": -476.04425048828125, "eval_loss": 0.6140997409820557, "eval_rewards/accuracies": 0.6644999980926514, "eval_rewards/chosen": -1.3708688020706177, "eval_rewards/margins": 0.37847331166267395, "eval_rewards/rejected": -1.7493420839309692, "eval_runtime": 196.843, "eval_samples_per_second": 10.16, "eval_steps_per_second": 5.08, "step": 3980 }, { "epoch": 0.52, "learning_rate": 2.742911390485262e-06, "logits/chosen": -2.4135918617248535, "logits/rejected": -2.4417901039123535, "logps/chosen": -402.02264404296875, "logps/rejected": -404.3270263671875, "loss": 0.6808, "rewards/accuracies": 0.5625, "rewards/chosen": -1.3463059663772583, "rewards/margins": 0.1900065392255783, "rewards/rejected": -1.536312460899353, "step": 3990 }, { "epoch": 0.52, "eval_logits/chosen": -2.34938645362854, "eval_logits/rejected": -2.364652395248413, "eval_logps/chosen": -467.2671203613281, "eval_logps/rejected": -472.96368408203125, "eval_loss": 0.6125109195709229, "eval_rewards/accuracies": 0.6639999747276306, "eval_rewards/chosen": -1.345177173614502, "eval_rewards/margins": 0.3733597993850708, "eval_rewards/rejected": -1.7185369729995728, "eval_runtime": 196.8367, "eval_samples_per_second": 10.161, "eval_steps_per_second": 5.08, "step": 3990 }, { "epoch": 0.52, "learning_rate": 2.731540629395239e-06, "logits/chosen": -2.462125778198242, "logits/rejected": -2.4748053550720215, "logps/chosen": -467.29669189453125, "logps/rejected": -465.6487731933594, "loss": 0.6083, "rewards/accuracies": 0.6875, "rewards/chosen": -1.2891066074371338, "rewards/margins": 0.29626819491386414, "rewards/rejected": -1.5853749513626099, "step": 4000 }, { "epoch": 0.52, "eval_logits/chosen": -2.34723162651062, "eval_logits/rejected": -2.3619539737701416, "eval_logps/chosen": -473.87408447265625, "eval_logps/rejected": -480.2319641113281, "eval_loss": 0.6121568083763123, "eval_rewards/accuracies": 0.6660000085830688, "eval_rewards/chosen": -1.4112465381622314, "eval_rewards/margins": 0.3799728453159332, "eval_rewards/rejected": -1.7912193536758423, "eval_runtime": 196.9069, "eval_samples_per_second": 10.157, "eval_steps_per_second": 5.079, "step": 4000 }, { "epoch": 0.52, "learning_rate": 2.7201650348860115e-06, "logits/chosen": -2.5356340408325195, "logits/rejected": -2.571254014968872, "logps/chosen": -432.988525390625, "logps/rejected": -411.39306640625, "loss": 0.5894, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.3545644283294678, "rewards/margins": 0.3995421826839447, "rewards/rejected": -1.7541065216064453, "step": 4010 }, { "epoch": 0.52, "eval_logits/chosen": -2.3608150482177734, "eval_logits/rejected": -2.3753154277801514, "eval_logps/chosen": -475.13519287109375, "eval_logps/rejected": -481.7857360839844, "eval_loss": 0.6117491126060486, "eval_rewards/accuracies": 0.6690000295639038, "eval_rewards/chosen": -1.4238581657409668, "eval_rewards/margins": 0.3828992545604706, "eval_rewards/rejected": -1.8067574501037598, "eval_runtime": 196.9545, "eval_samples_per_second": 10.155, "eval_steps_per_second": 5.077, "step": 4010 }, { "epoch": 0.53, "learning_rate": 2.7087848444235354e-06, "logits/chosen": -2.5912222862243652, "logits/rejected": -2.531287431716919, "logps/chosen": -489.0033264160156, "logps/rejected": -509.7576599121094, "loss": 0.5505, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.4281346797943115, "rewards/margins": 0.5264540910720825, "rewards/rejected": -1.9545888900756836, "step": 4020 }, { "epoch": 0.53, "eval_logits/chosen": -2.37618350982666, "eval_logits/rejected": -2.390709638595581, "eval_logps/chosen": -469.84698486328125, "eval_logps/rejected": -475.90283203125, "eval_loss": 0.6106529831886292, "eval_rewards/accuracies": 0.6685000061988831, "eval_rewards/chosen": -1.3709757328033447, "eval_rewards/margins": 0.376952588558197, "eval_rewards/rejected": -1.747928500175476, "eval_runtime": 196.9284, "eval_samples_per_second": 10.156, "eval_steps_per_second": 5.078, "step": 4020 }, { "epoch": 0.53, "learning_rate": 2.697400295569707e-06, "logits/chosen": -2.601231575012207, "logits/rejected": -2.6253762245178223, "logps/chosen": -414.8094177246094, "logps/rejected": -472.2815856933594, "loss": 0.5603, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.2042758464813232, "rewards/margins": 0.5171712040901184, "rewards/rejected": -1.7214473485946655, "step": 4030 }, { "epoch": 0.53, "eval_logits/chosen": -2.3801462650299072, "eval_logits/rejected": -2.394869565963745, "eval_logps/chosen": -464.7630920410156, "eval_logps/rejected": -470.367919921875, "eval_loss": 0.6102996468544006, "eval_rewards/accuracies": 0.6639999747276306, "eval_rewards/chosen": -1.3201372623443604, "eval_rewards/margins": 0.3724416494369507, "eval_rewards/rejected": -1.692578911781311, "eval_runtime": 196.9835, "eval_samples_per_second": 10.153, "eval_steps_per_second": 5.077, "step": 4030 }, { "epoch": 0.53, "learning_rate": 2.6860116259774065e-06, "logits/chosen": -2.525394916534424, "logits/rejected": -2.496546983718872, "logps/chosen": -484.4578552246094, "logps/rejected": -508.57562255859375, "loss": 0.5443, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.2538907527923584, "rewards/margins": 0.5189865827560425, "rewards/rejected": -1.7728774547576904, "step": 4040 }, { "epoch": 0.53, "eval_logits/chosen": -2.37362003326416, "eval_logits/rejected": -2.3889076709747314, "eval_logps/chosen": -463.3284912109375, "eval_logps/rejected": -468.995361328125, "eval_loss": 0.6113187074661255, "eval_rewards/accuracies": 0.6639999747276306, "eval_rewards/chosen": -1.3057914972305298, "eval_rewards/margins": 0.37306222319602966, "eval_rewards/rejected": -1.6788537502288818, "eval_runtime": 197.1492, "eval_samples_per_second": 10.145, "eval_steps_per_second": 5.072, "step": 4040 }, { "epoch": 0.53, "learning_rate": 2.674619073385531e-06, "logits/chosen": -2.4929561614990234, "logits/rejected": -2.495772361755371, "logps/chosen": -421.23785400390625, "logps/rejected": -454.38140869140625, "loss": 0.602, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.2588123083114624, "rewards/margins": 0.44167566299438477, "rewards/rejected": -1.7004880905151367, "step": 4050 }, { "epoch": 0.53, "eval_logits/chosen": -2.362971544265747, "eval_logits/rejected": -2.37823748588562, "eval_logps/chosen": -467.1461181640625, "eval_logps/rejected": -473.2948913574219, "eval_loss": 0.6117571592330933, "eval_rewards/accuracies": 0.6650000214576721, "eval_rewards/chosen": -1.343967080116272, "eval_rewards/margins": 0.37788188457489014, "eval_rewards/rejected": -1.721848964691162, "eval_runtime": 197.0418, "eval_samples_per_second": 10.15, "eval_steps_per_second": 5.075, "step": 4050 }, { "epoch": 0.53, "learning_rate": 2.663222875614038e-06, "logits/chosen": -2.5204296112060547, "logits/rejected": -2.4171836376190186, "logps/chosen": -450.54150390625, "logps/rejected": -466.87615966796875, "loss": 0.6865, "rewards/accuracies": 0.6875, "rewards/chosen": -1.4656872749328613, "rewards/margins": 0.18536174297332764, "rewards/rejected": -1.6510488986968994, "step": 4060 }, { "epoch": 0.53, "eval_logits/chosen": -2.3661386966705322, "eval_logits/rejected": -2.3818247318267822, "eval_logps/chosen": -461.9250793457031, "eval_logps/rejected": -467.6927490234375, "eval_loss": 0.6116368174552917, "eval_rewards/accuracies": 0.6629999876022339, "eval_rewards/chosen": -1.291756510734558, "eval_rewards/margins": 0.3740708827972412, "eval_rewards/rejected": -1.6658276319503784, "eval_runtime": 197.0338, "eval_samples_per_second": 10.151, "eval_steps_per_second": 5.075, "step": 4060 }, { "epoch": 0.53, "learning_rate": 2.6518232705589775e-06, "logits/chosen": -2.5525612831115723, "logits/rejected": -2.538083553314209, "logps/chosen": -455.64080810546875, "logps/rejected": -495.1800231933594, "loss": 0.5712, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.2466583251953125, "rewards/margins": 0.4873170852661133, "rewards/rejected": -1.7339754104614258, "step": 4070 }, { "epoch": 0.53, "eval_logits/chosen": -2.3656821250915527, "eval_logits/rejected": -2.3814144134521484, "eval_logps/chosen": -461.1421813964844, "eval_logps/rejected": -467.19329833984375, "eval_loss": 0.6121630072593689, "eval_rewards/accuracies": 0.6654999852180481, "eval_rewards/chosen": -1.2839277982711792, "eval_rewards/margins": 0.37690529227256775, "eval_rewards/rejected": -1.6608332395553589, "eval_runtime": 196.9705, "eval_samples_per_second": 10.154, "eval_steps_per_second": 5.077, "step": 4070 }, { "epoch": 0.53, "learning_rate": 2.640420496187528e-06, "logits/chosen": -2.457648754119873, "logits/rejected": -2.4747841358184814, "logps/chosen": -490.0325622558594, "logps/rejected": -483.89453125, "loss": 0.5086, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.2399706840515137, "rewards/margins": 0.6194084882736206, "rewards/rejected": -1.8593791723251343, "step": 4080 }, { "epoch": 0.53, "eval_logits/chosen": -2.368699073791504, "eval_logits/rejected": -2.3840346336364746, "eval_logps/chosen": -463.33380126953125, "eval_logps/rejected": -469.78082275390625, "eval_loss": 0.6119689345359802, "eval_rewards/accuracies": 0.6635000109672546, "eval_rewards/chosen": -1.3058441877365112, "eval_rewards/margins": 0.3808634877204895, "eval_rewards/rejected": -1.686707854270935, "eval_runtime": 196.7765, "eval_samples_per_second": 10.164, "eval_steps_per_second": 5.082, "step": 4080 }, { "epoch": 0.54, "learning_rate": 2.629014790533025e-06, "logits/chosen": -2.52437424659729, "logits/rejected": -2.452230930328369, "logps/chosen": -495.469482421875, "logps/rejected": -457.87713623046875, "loss": 0.6036, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.306132197380066, "rewards/margins": 0.4579140543937683, "rewards/rejected": -1.764046311378479, "step": 4090 }, { "epoch": 0.54, "eval_logits/chosen": -2.366572380065918, "eval_logits/rejected": -2.381913185119629, "eval_logps/chosen": -467.9704284667969, "eval_logps/rejected": -475.1983642578125, "eval_loss": 0.6131882071495056, "eval_rewards/accuracies": 0.6669999957084656, "eval_rewards/chosen": -1.3522101640701294, "eval_rewards/margins": 0.3886730372905731, "eval_rewards/rejected": -1.7408833503723145, "eval_runtime": 196.9962, "eval_samples_per_second": 10.152, "eval_steps_per_second": 5.076, "step": 4090 }, { "epoch": 0.54, "learning_rate": 2.617606391689996e-06, "logits/chosen": -2.5924911499023438, "logits/rejected": -2.550729274749756, "logps/chosen": -465.5814514160156, "logps/rejected": -473.2737731933594, "loss": 0.6175, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.2862884998321533, "rewards/margins": 0.428173303604126, "rewards/rejected": -1.7144616842269897, "step": 4100 }, { "epoch": 0.54, "eval_logits/chosen": -2.3732011318206787, "eval_logits/rejected": -2.38840651512146, "eval_logps/chosen": -468.1484069824219, "eval_logps/rejected": -475.3802490234375, "eval_loss": 0.6129105091094971, "eval_rewards/accuracies": 0.6629999876022339, "eval_rewards/chosen": -1.3539899587631226, "eval_rewards/margins": 0.3887125849723816, "eval_rewards/rejected": -1.7427024841308594, "eval_runtime": 196.9234, "eval_samples_per_second": 10.156, "eval_steps_per_second": 5.078, "step": 4100 }, { "epoch": 0.54, "learning_rate": 2.6061955378091896e-06, "logits/chosen": -2.5106284618377686, "logits/rejected": -2.460104465484619, "logps/chosen": -426.4384765625, "logps/rejected": -476.866455078125, "loss": 0.5335, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.2528026103973389, "rewards/margins": 0.600531816482544, "rewards/rejected": -1.8533344268798828, "step": 4110 }, { "epoch": 0.54, "eval_logits/chosen": -2.374972343444824, "eval_logits/rejected": -2.39029598236084, "eval_logps/chosen": -465.0861511230469, "eval_logps/rejected": -472.0409851074219, "eval_loss": 0.612612247467041, "eval_rewards/accuracies": 0.6654999852180481, "eval_rewards/chosen": -1.3233674764633179, "eval_rewards/margins": 0.38594210147857666, "eval_rewards/rejected": -1.709309697151184, "eval_runtime": 196.8488, "eval_samples_per_second": 10.16, "eval_steps_per_second": 5.08, "step": 4110 }, { "epoch": 0.54, "learning_rate": 2.5947824670926025e-06, "logits/chosen": -2.5935683250427246, "logits/rejected": -2.5762457847595215, "logps/chosen": -423.15423583984375, "logps/rejected": -490.165771484375, "loss": 0.5439, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.1569029092788696, "rewards/margins": 0.5699300765991211, "rewards/rejected": -1.7268329858779907, "step": 4120 }, { "epoch": 0.54, "eval_logits/chosen": -2.3668456077575684, "eval_logits/rejected": -2.3826231956481934, "eval_logps/chosen": -457.88714599609375, "eval_logps/rejected": -464.2001953125, "eval_loss": 0.6133199334144592, "eval_rewards/accuracies": 0.6660000085830688, "eval_rewards/chosen": -1.251376986503601, "eval_rewards/margins": 0.3795250356197357, "eval_rewards/rejected": -1.6309019327163696, "eval_runtime": 197.1601, "eval_samples_per_second": 10.144, "eval_steps_per_second": 5.072, "step": 4120 }, { "epoch": 0.54, "learning_rate": 2.583367417788508e-06, "logits/chosen": -2.451611042022705, "logits/rejected": -2.436627149581909, "logps/chosen": -442.36431884765625, "logps/rejected": -469.06048583984375, "loss": 0.5798, "rewards/accuracies": 0.75, "rewards/chosen": -1.2912431955337524, "rewards/margins": 0.5064207315444946, "rewards/rejected": -1.797663927078247, "step": 4130 }, { "epoch": 0.54, "eval_logits/chosen": -2.359987258911133, "eval_logits/rejected": -2.375581979751587, "eval_logps/chosen": -456.96063232421875, "eval_logps/rejected": -463.28228759765625, "eval_loss": 0.6134931445121765, "eval_rewards/accuracies": 0.6650000214576721, "eval_rewards/chosen": -1.2421122789382935, "eval_rewards/margins": 0.3796096742153168, "eval_rewards/rejected": -1.6217222213745117, "eval_runtime": 196.9855, "eval_samples_per_second": 10.153, "eval_steps_per_second": 5.077, "step": 4130 }, { "epoch": 0.54, "learning_rate": 2.5719506281864838e-06, "logits/chosen": -2.603020191192627, "logits/rejected": -2.580487012863159, "logps/chosen": -469.97601318359375, "logps/rejected": -435.74835205078125, "loss": 0.5875, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.1624748706817627, "rewards/margins": 0.44589272141456604, "rewards/rejected": -1.608367681503296, "step": 4140 }, { "epoch": 0.54, "eval_logits/chosen": -2.3628158569335938, "eval_logits/rejected": -2.3783905506134033, "eval_logps/chosen": -457.54718017578125, "eval_logps/rejected": -464.145263671875, "eval_loss": 0.6132175922393799, "eval_rewards/accuracies": 0.6679999828338623, "eval_rewards/chosen": -1.247977614402771, "eval_rewards/margins": 0.3823748826980591, "eval_rewards/rejected": -1.6303523778915405, "eval_runtime": 196.9295, "eval_samples_per_second": 10.156, "eval_steps_per_second": 5.078, "step": 4140 }, { "epoch": 0.54, "learning_rate": 2.5605323366124335e-06, "logits/chosen": -2.4823946952819824, "logits/rejected": -2.399623394012451, "logps/chosen": -442.94219970703125, "logps/rejected": -465.85443115234375, "loss": 0.6093, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.2800296545028687, "rewards/margins": 0.39251285791397095, "rewards/rejected": -1.6725425720214844, "step": 4150 }, { "epoch": 0.54, "eval_logits/chosen": -2.375561475753784, "eval_logits/rejected": -2.3912646770477295, "eval_logps/chosen": -453.0314636230469, "eval_logps/rejected": -459.216064453125, "eval_loss": 0.6121273636817932, "eval_rewards/accuracies": 0.6685000061988831, "eval_rewards/chosen": -1.2028201818466187, "eval_rewards/margins": 0.3782404065132141, "eval_rewards/rejected": -1.581060528755188, "eval_runtime": 197.1211, "eval_samples_per_second": 10.146, "eval_steps_per_second": 5.073, "step": 4150 }, { "epoch": 0.54, "learning_rate": 2.5491127814236172e-06, "logits/chosen": -2.570061445236206, "logits/rejected": -2.5789883136749268, "logps/chosen": -378.7374572753906, "logps/rejected": -458.75421142578125, "loss": 0.6094, "rewards/accuracies": 0.6875, "rewards/chosen": -1.094857931137085, "rewards/margins": 0.36024436354637146, "rewards/rejected": -1.4551023244857788, "step": 4160 }, { "epoch": 0.54, "eval_logits/chosen": -2.3745861053466797, "eval_logits/rejected": -2.3902618885040283, "eval_logps/chosen": -452.4034423828125, "eval_logps/rejected": -458.7520751953125, "eval_loss": 0.6126303672790527, "eval_rewards/accuracies": 0.6675000190734863, "eval_rewards/chosen": -1.1965399980545044, "eval_rewards/margins": 0.3798801302909851, "eval_rewards/rejected": -1.5764203071594238, "eval_runtime": 197.2083, "eval_samples_per_second": 10.142, "eval_steps_per_second": 5.071, "step": 4160 }, { "epoch": 0.55, "learning_rate": 2.537692201003671e-06, "logits/chosen": -2.538421869277954, "logits/rejected": -2.5713725090026855, "logps/chosen": -450.49005126953125, "logps/rejected": -484.93487548828125, "loss": 0.5578, "rewards/accuracies": 0.75, "rewards/chosen": -1.2483497858047485, "rewards/margins": 0.5216721296310425, "rewards/rejected": -1.7700217962265015, "step": 4170 }, { "epoch": 0.55, "eval_logits/chosen": -2.3649206161499023, "eval_logits/rejected": -2.3805949687957764, "eval_logps/chosen": -451.88079833984375, "eval_logps/rejected": -458.26397705078125, "eval_loss": 0.6134587526321411, "eval_rewards/accuracies": 0.6639999747276306, "eval_rewards/chosen": -1.1913139820098877, "eval_rewards/margins": 0.3802258372306824, "eval_rewards/rejected": -1.5715397596359253, "eval_runtime": 196.9874, "eval_samples_per_second": 10.153, "eval_steps_per_second": 5.076, "step": 4170 }, { "epoch": 0.55, "learning_rate": 2.526270833757635e-06, "logits/chosen": -2.5782477855682373, "logits/rejected": -2.5254034996032715, "logps/chosen": -440.1346130371094, "logps/rejected": -454.3451232910156, "loss": 0.5732, "rewards/accuracies": 0.6875, "rewards/chosen": -1.1584813594818115, "rewards/margins": 0.5007287859916687, "rewards/rejected": -1.659210205078125, "step": 4180 }, { "epoch": 0.55, "eval_logits/chosen": -2.3597798347473145, "eval_logits/rejected": -2.3748152256011963, "eval_logps/chosen": -454.84210205078125, "eval_logps/rejected": -461.6698913574219, "eval_loss": 0.6135467290878296, "eval_rewards/accuracies": 0.6635000109672546, "eval_rewards/chosen": -1.2209270000457764, "eval_rewards/margins": 0.38467180728912354, "eval_rewards/rejected": -1.6055988073349, "eval_runtime": 196.8673, "eval_samples_per_second": 10.159, "eval_steps_per_second": 5.08, "step": 4180 }, { "epoch": 0.55, "learning_rate": 2.514848918106971e-06, "logits/chosen": -2.5071187019348145, "logits/rejected": -2.4454050064086914, "logps/chosen": -454.74652099609375, "logps/rejected": -439.2115173339844, "loss": 0.6302, "rewards/accuracies": 0.625, "rewards/chosen": -1.3234502077102661, "rewards/margins": 0.3678116202354431, "rewards/rejected": -1.691261649131775, "step": 4190 }, { "epoch": 0.55, "eval_logits/chosen": -2.3559532165527344, "eval_logits/rejected": -2.3702216148376465, "eval_logps/chosen": -456.7697448730469, "eval_logps/rejected": -463.5509948730469, "eval_loss": 0.6118788719177246, "eval_rewards/accuracies": 0.6675000190734863, "eval_rewards/chosen": -1.2402034997940063, "eval_rewards/margins": 0.3842066526412964, "eval_rewards/rejected": -1.6244101524353027, "eval_runtime": 196.8886, "eval_samples_per_second": 10.158, "eval_steps_per_second": 5.079, "step": 4190 }, { "epoch": 0.55, "learning_rate": 2.503426692484594e-06, "logits/chosen": -2.5244762897491455, "logits/rejected": -2.511427402496338, "logps/chosen": -434.56427001953125, "logps/rejected": -478.21044921875, "loss": 0.5961, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.1945250034332275, "rewards/margins": 0.3992057740688324, "rewards/rejected": -1.5937308073043823, "step": 4200 }, { "epoch": 0.55, "eval_logits/chosen": -2.3472087383270264, "eval_logits/rejected": -2.361002206802368, "eval_logps/chosen": -462.36651611328125, "eval_logps/rejected": -469.5614929199219, "eval_loss": 0.6113600134849548, "eval_rewards/accuracies": 0.6644999980926514, "eval_rewards/chosen": -1.2961714267730713, "eval_rewards/margins": 0.38834336400032043, "eval_rewards/rejected": -1.6845147609710693, "eval_runtime": 196.9479, "eval_samples_per_second": 10.155, "eval_steps_per_second": 5.077, "step": 4200 }, { "epoch": 0.55, "learning_rate": 2.492004395329883e-06, "logits/chosen": -2.5484352111816406, "logits/rejected": -2.530270576477051, "logps/chosen": -436.87493896484375, "logps/rejected": -444.59100341796875, "loss": 0.5818, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.1828187704086304, "rewards/margins": 0.43469303846359253, "rewards/rejected": -1.6175119876861572, "step": 4210 }, { "epoch": 0.55, "eval_logits/chosen": -2.3477280139923096, "eval_logits/rejected": -2.3614227771759033, "eval_logps/chosen": -463.8641357421875, "eval_logps/rejected": -471.26556396484375, "eval_loss": 0.6109665632247925, "eval_rewards/accuracies": 0.6660000085830688, "eval_rewards/chosen": -1.3111472129821777, "eval_rewards/margins": 0.39040789008140564, "eval_rewards/rejected": -1.7015551328659058, "eval_runtime": 196.7938, "eval_samples_per_second": 10.163, "eval_steps_per_second": 5.081, "step": 4210 }, { "epoch": 0.55, "learning_rate": 2.4805822650837165e-06, "logits/chosen": -2.426492929458618, "logits/rejected": -2.454468011856079, "logps/chosen": -422.4960021972656, "logps/rejected": -492.2879333496094, "loss": 0.5239, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.2959611415863037, "rewards/margins": 0.6673166751861572, "rewards/rejected": -1.963277816772461, "step": 4220 }, { "epoch": 0.55, "eval_logits/chosen": -2.3370590209960938, "eval_logits/rejected": -2.3499491214752197, "eval_logps/chosen": -471.30816650390625, "eval_logps/rejected": -479.48760986328125, "eval_loss": 0.6105585694313049, "eval_rewards/accuracies": 0.6650000214576721, "eval_rewards/chosen": -1.3855873346328735, "eval_rewards/margins": 0.3981887698173523, "eval_rewards/rejected": -1.7837762832641602, "eval_runtime": 196.8749, "eval_samples_per_second": 10.159, "eval_steps_per_second": 5.079, "step": 4220 }, { "epoch": 0.55, "learning_rate": 2.4691605401834843e-06, "logits/chosen": -2.6059975624084473, "logits/rejected": -2.5732944011688232, "logps/chosen": -486.0270080566406, "logps/rejected": -500.5328063964844, "loss": 0.6414, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -1.333966851234436, "rewards/margins": 0.28036683797836304, "rewards/rejected": -1.6143337488174438, "step": 4230 }, { "epoch": 0.55, "eval_logits/chosen": -2.332699775695801, "eval_logits/rejected": -2.3459360599517822, "eval_logps/chosen": -468.0067443847656, "eval_logps/rejected": -475.77203369140625, "eval_loss": 0.6105542778968811, "eval_rewards/accuracies": 0.6625000238418579, "eval_rewards/chosen": -1.3525731563568115, "eval_rewards/margins": 0.3940469026565552, "eval_rewards/rejected": -1.7466199398040771, "eval_runtime": 196.7934, "eval_samples_per_second": 10.163, "eval_steps_per_second": 5.081, "step": 4230 }, { "epoch": 0.55, "learning_rate": 2.457739459058117e-06, "logits/chosen": -2.6030757427215576, "logits/rejected": -2.584155559539795, "logps/chosen": -513.5277099609375, "logps/rejected": -507.9750061035156, "loss": 0.5823, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.2995240688323975, "rewards/margins": 0.44727516174316406, "rewards/rejected": -1.746799111366272, "step": 4240 }, { "epoch": 0.55, "eval_logits/chosen": -2.3276073932647705, "eval_logits/rejected": -2.3409342765808105, "eval_logps/chosen": -466.8489990234375, "eval_logps/rejected": -474.26348876953125, "eval_loss": 0.6102898716926575, "eval_rewards/accuracies": 0.6664999723434448, "eval_rewards/chosen": -1.3409960269927979, "eval_rewards/margins": 0.3905387222766876, "eval_rewards/rejected": -1.7315348386764526, "eval_runtime": 196.9447, "eval_samples_per_second": 10.155, "eval_steps_per_second": 5.078, "step": 4240 }, { "epoch": 0.56, "learning_rate": 2.4463192601231054e-06, "logits/chosen": -2.527188539505005, "logits/rejected": -2.4350686073303223, "logps/chosen": -512.815673828125, "logps/rejected": -483.7102966308594, "loss": 0.5697, "rewards/accuracies": 0.6875, "rewards/chosen": -1.3667986392974854, "rewards/margins": 0.5209914445877075, "rewards/rejected": -1.8877900838851929, "step": 4250 }, { "epoch": 0.56, "eval_logits/chosen": -2.327854633331299, "eval_logits/rejected": -2.3410706520080566, "eval_logps/chosen": -465.3106689453125, "eval_logps/rejected": -472.47796630859375, "eval_loss": 0.6097335815429688, "eval_rewards/accuracies": 0.6629999876022339, "eval_rewards/chosen": -1.3256126642227173, "eval_rewards/margins": 0.3880668580532074, "eval_rewards/rejected": -1.713679313659668, "eval_runtime": 197.0119, "eval_samples_per_second": 10.152, "eval_steps_per_second": 5.076, "step": 4250 }, { "epoch": 0.56, "learning_rate": 2.434900181775524e-06, "logits/chosen": -2.5026462078094482, "logits/rejected": -2.5014119148254395, "logps/chosen": -471.37548828125, "logps/rejected": -479.77020263671875, "loss": 0.6178, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.356858491897583, "rewards/margins": 0.3824175000190735, "rewards/rejected": -1.7392759323120117, "step": 4260 }, { "epoch": 0.56, "eval_logits/chosen": -2.336132049560547, "eval_logits/rejected": -2.3492181301116943, "eval_logps/chosen": -464.2100524902344, "eval_logps/rejected": -471.1261901855469, "eval_loss": 0.6091320514678955, "eval_rewards/accuracies": 0.6639999747276306, "eval_rewards/chosen": -1.3146066665649414, "eval_rewards/margins": 0.38555505871772766, "eval_rewards/rejected": -1.7001614570617676, "eval_runtime": 196.757, "eval_samples_per_second": 10.165, "eval_steps_per_second": 5.082, "step": 4260 }, { "epoch": 0.56, "learning_rate": 2.4234824623890578e-06, "logits/chosen": -2.617096424102783, "logits/rejected": -2.5573208332061768, "logps/chosen": -455.67352294921875, "logps/rejected": -475.95867919921875, "loss": 0.5538, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.3118655681610107, "rewards/margins": 0.5087541341781616, "rewards/rejected": -1.820619821548462, "step": 4270 }, { "epoch": 0.56, "eval_logits/chosen": -2.335390329360962, "eval_logits/rejected": -2.348327398300171, "eval_logps/chosen": -464.7738952636719, "eval_logps/rejected": -471.7409973144531, "eval_loss": 0.6090496778488159, "eval_rewards/accuracies": 0.6644999980926514, "eval_rewards/chosen": -1.3202449083328247, "eval_rewards/margins": 0.38606494665145874, "eval_rewards/rejected": -1.7063097953796387, "eval_runtime": 196.9399, "eval_samples_per_second": 10.155, "eval_steps_per_second": 5.078, "step": 4270 }, { "epoch": 0.56, "learning_rate": 2.4120663403090193e-06, "logits/chosen": -2.5204405784606934, "logits/rejected": -2.515784502029419, "logps/chosen": -462.69903564453125, "logps/rejected": -501.7197265625, "loss": 0.5863, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.3065764904022217, "rewards/margins": 0.42397910356521606, "rewards/rejected": -1.730555772781372, "step": 4280 }, { "epoch": 0.56, "eval_logits/chosen": -2.332894802093506, "eval_logits/rejected": -2.3454771041870117, "eval_logps/chosen": -468.6414489746094, "eval_logps/rejected": -476.00213623046875, "eval_loss": 0.6092647910118103, "eval_rewards/accuracies": 0.6654999852180481, "eval_rewards/chosen": -1.3589202165603638, "eval_rewards/margins": 0.39000067114830017, "eval_rewards/rejected": -1.7489211559295654, "eval_runtime": 197.027, "eval_samples_per_second": 10.151, "eval_steps_per_second": 5.075, "step": 4280 }, { "epoch": 0.56, "learning_rate": 2.40065205384738e-06, "logits/chosen": -2.482933282852173, "logits/rejected": -2.405017852783203, "logps/chosen": -444.735107421875, "logps/rejected": -423.2681579589844, "loss": 0.7136, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -1.447666049003601, "rewards/margins": 0.18110871315002441, "rewards/rejected": -1.628774881362915, "step": 4290 }, { "epoch": 0.56, "eval_logits/chosen": -2.337977647781372, "eval_logits/rejected": -2.350689172744751, "eval_logps/chosen": -465.7172546386719, "eval_logps/rejected": -472.37890625, "eval_loss": 0.6085383296012878, "eval_rewards/accuracies": 0.6650000214576721, "eval_rewards/chosen": -1.3296782970428467, "eval_rewards/margins": 0.38301026821136475, "eval_rewards/rejected": -1.712688684463501, "eval_runtime": 197.1026, "eval_samples_per_second": 10.147, "eval_steps_per_second": 5.073, "step": 4290 }, { "epoch": 0.56, "learning_rate": 2.389239841277793e-06, "logits/chosen": -2.367617130279541, "logits/rejected": -2.3953096866607666, "logps/chosen": -449.0538024902344, "logps/rejected": -443.99176025390625, "loss": 0.5972, "rewards/accuracies": 0.625, "rewards/chosen": -1.3654316663742065, "rewards/margins": 0.3840712308883667, "rewards/rejected": -1.7495027780532837, "step": 4300 }, { "epoch": 0.56, "eval_logits/chosen": -2.348155975341797, "eval_logits/rejected": -2.3609445095062256, "eval_logps/chosen": -463.5664367675781, "eval_logps/rejected": -469.9287109375, "eval_loss": 0.6079076528549194, "eval_rewards/accuracies": 0.6644999980926514, "eval_rewards/chosen": -1.3081703186035156, "eval_rewards/margins": 0.3800167143344879, "eval_rewards/rejected": -1.6881871223449707, "eval_runtime": 196.9503, "eval_samples_per_second": 10.155, "eval_steps_per_second": 5.077, "step": 4300 }, { "epoch": 0.56, "learning_rate": 2.3778299408306167e-06, "logits/chosen": -2.5109307765960693, "logits/rejected": -2.4798407554626465, "logps/chosen": -425.0166931152344, "logps/rejected": -450.65692138671875, "loss": 0.5835, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.210313081741333, "rewards/margins": 0.47474008798599243, "rewards/rejected": -1.6850531101226807, "step": 4310 }, { "epoch": 0.56, "eval_logits/chosen": -2.3537330627441406, "eval_logits/rejected": -2.3664982318878174, "eval_logps/chosen": -462.9638366699219, "eval_logps/rejected": -469.1507873535156, "eval_loss": 0.6074733734130859, "eval_rewards/accuracies": 0.6639999747276306, "eval_rewards/chosen": -1.3021445274353027, "eval_rewards/margins": 0.37826311588287354, "eval_rewards/rejected": -1.6804077625274658, "eval_runtime": 197.2587, "eval_samples_per_second": 10.139, "eval_steps_per_second": 5.069, "step": 4310 }, { "epoch": 0.57, "learning_rate": 2.3664225906879452e-06, "logits/chosen": -2.504697561264038, "logits/rejected": -2.5029256343841553, "logps/chosen": -428.6754455566406, "logps/rejected": -426.82843017578125, "loss": 0.6176, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.2406651973724365, "rewards/margins": 0.34046998620033264, "rewards/rejected": -1.5811351537704468, "step": 4320 }, { "epoch": 0.57, "eval_logits/chosen": -2.3593673706054688, "eval_logits/rejected": -2.3729419708251953, "eval_logps/chosen": -456.5874328613281, "eval_logps/rejected": -462.12481689453125, "eval_loss": 0.607283353805542, "eval_rewards/accuracies": 0.6610000133514404, "eval_rewards/chosen": -1.2383801937103271, "eval_rewards/margins": 0.3717676103115082, "eval_rewards/rejected": -1.6101479530334473, "eval_runtime": 197.0978, "eval_samples_per_second": 10.147, "eval_steps_per_second": 5.074, "step": 4320 }, { "epoch": 0.57, "learning_rate": 2.3550180289786357e-06, "logits/chosen": -2.5368552207946777, "logits/rejected": -2.469285488128662, "logps/chosen": -431.1910705566406, "logps/rejected": -420.4564514160156, "loss": 0.5657, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.0879895687103271, "rewards/margins": 0.4566231369972229, "rewards/rejected": -1.5446126461029053, "step": 4330 }, { "epoch": 0.57, "eval_logits/chosen": -2.364882230758667, "eval_logits/rejected": -2.378333806991577, "eval_logps/chosen": -456.3202819824219, "eval_logps/rejected": -461.741943359375, "eval_loss": 0.6068199276924133, "eval_rewards/accuracies": 0.6654999852180481, "eval_rewards/chosen": -1.235708475112915, "eval_rewards/margins": 0.3706108033657074, "eval_rewards/rejected": -1.6063191890716553, "eval_runtime": 197.2059, "eval_samples_per_second": 10.142, "eval_steps_per_second": 5.071, "step": 4330 }, { "epoch": 0.57, "learning_rate": 2.343616493773335e-06, "logits/chosen": -2.6210741996765137, "logits/rejected": -2.5647199153900146, "logps/chosen": -448.553466796875, "logps/rejected": -487.02490234375, "loss": 0.5632, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.2043299674987793, "rewards/margins": 0.42456427216529846, "rewards/rejected": -1.6288942098617554, "step": 4340 }, { "epoch": 0.57, "eval_logits/chosen": -2.36385440826416, "eval_logits/rejected": -2.3777124881744385, "eval_logps/chosen": -456.9960021972656, "eval_logps/rejected": -462.7846374511719, "eval_loss": 0.6074703335762024, "eval_rewards/accuracies": 0.6629999876022339, "eval_rewards/chosen": -1.242465853691101, "eval_rewards/margins": 0.37428027391433716, "eval_rewards/rejected": -1.6167460680007935, "eval_runtime": 196.9677, "eval_samples_per_second": 10.154, "eval_steps_per_second": 5.077, "step": 4340 }, { "epoch": 0.57, "learning_rate": 2.3322182230795127e-06, "logits/chosen": -2.5477375984191895, "logits/rejected": -2.5292723178863525, "logps/chosen": -395.3967590332031, "logps/rejected": -476.68109130859375, "loss": 0.5542, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.1301119327545166, "rewards/margins": 0.5130189657211304, "rewards/rejected": -1.6431306600570679, "step": 4350 }, { "epoch": 0.57, "eval_logits/chosen": -2.362542152404785, "eval_logits/rejected": -2.3769373893737793, "eval_logps/chosen": -454.8631896972656, "eval_logps/rejected": -460.7909851074219, "eval_loss": 0.6081883311271667, "eval_rewards/accuracies": 0.6610000133514404, "eval_rewards/chosen": -1.2211376428604126, "eval_rewards/margins": 0.3756721317768097, "eval_rewards/rejected": -1.5968098640441895, "eval_runtime": 196.9785, "eval_samples_per_second": 10.153, "eval_steps_per_second": 5.077, "step": 4350 }, { "epoch": 0.57, "learning_rate": 2.320823454836491e-06, "logits/chosen": -2.7069315910339355, "logits/rejected": -2.598485231399536, "logps/chosen": -436.9664001464844, "logps/rejected": -443.3999938964844, "loss": 0.5563, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.1292293071746826, "rewards/margins": 0.44567570090293884, "rewards/rejected": -1.5749050378799438, "step": 4360 }, { "epoch": 0.57, "eval_logits/chosen": -2.362305164337158, "eval_logits/rejected": -2.377182722091675, "eval_logps/chosen": -453.9595031738281, "eval_logps/rejected": -460.05413818359375, "eval_loss": 0.6087186336517334, "eval_rewards/accuracies": 0.6654999852180481, "eval_rewards/chosen": -1.2121007442474365, "eval_rewards/margins": 0.3773403763771057, "eval_rewards/rejected": -1.589441180229187, "eval_runtime": 197.1092, "eval_samples_per_second": 10.147, "eval_steps_per_second": 5.073, "step": 4360 }, { "epoch": 0.57, "learning_rate": 2.309432426910478e-06, "logits/chosen": -2.4575705528259277, "logits/rejected": -2.4372870922088623, "logps/chosen": -483.6983337402344, "logps/rejected": -443.008056640625, "loss": 0.6174, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.1192137002944946, "rewards/margins": 0.37381118535995483, "rewards/rejected": -1.4930248260498047, "step": 4370 }, { "epoch": 0.57, "eval_logits/chosen": -2.3570568561553955, "eval_logits/rejected": -2.371819257736206, "eval_logps/chosen": -455.49847412109375, "eval_logps/rejected": -461.7596740722656, "eval_loss": 0.6090093851089478, "eval_rewards/accuracies": 0.6654999852180481, "eval_rewards/chosen": -1.227491021156311, "eval_rewards/margins": 0.379006028175354, "eval_rewards/rejected": -1.606496810913086, "eval_runtime": 197.0527, "eval_samples_per_second": 10.15, "eval_steps_per_second": 5.075, "step": 4370 }, { "epoch": 0.57, "learning_rate": 2.298045377089604e-06, "logits/chosen": -2.5362887382507324, "logits/rejected": -2.52489972114563, "logps/chosen": -435.7310485839844, "logps/rejected": -447.492919921875, "loss": 0.5537, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.2247138023376465, "rewards/margins": 0.4836392402648926, "rewards/rejected": -1.708353042602539, "step": 4380 }, { "epoch": 0.57, "eval_logits/chosen": -2.35113787651062, "eval_logits/rejected": -2.365795612335205, "eval_logps/chosen": -460.2951965332031, "eval_logps/rejected": -467.2223815917969, "eval_loss": 0.6088528037071228, "eval_rewards/accuracies": 0.6629999876022339, "eval_rewards/chosen": -1.2754576206207275, "eval_rewards/margins": 0.3856658637523651, "eval_rewards/rejected": -1.661123514175415, "eval_runtime": 196.9929, "eval_samples_per_second": 10.153, "eval_steps_per_second": 5.076, "step": 4380 }, { "epoch": 0.57, "learning_rate": 2.286662543078955e-06, "logits/chosen": -2.4176924228668213, "logits/rejected": -2.4342312812805176, "logps/chosen": -475.22503662109375, "logps/rejected": -464.350830078125, "loss": 0.5696, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.2765443325042725, "rewards/margins": 0.389670729637146, "rewards/rejected": -1.666215181350708, "step": 4390 }, { "epoch": 0.57, "eval_logits/chosen": -2.35123348236084, "eval_logits/rejected": -2.3659682273864746, "eval_logps/chosen": -462.67279052734375, "eval_logps/rejected": -469.9460144042969, "eval_loss": 0.6087071299552917, "eval_rewards/accuracies": 0.6629999876022339, "eval_rewards/chosen": -1.29923415184021, "eval_rewards/margins": 0.38912561535835266, "eval_rewards/rejected": -1.6883596181869507, "eval_runtime": 196.9775, "eval_samples_per_second": 10.153, "eval_steps_per_second": 5.077, "step": 4390 }, { "epoch": 0.58, "learning_rate": 2.2752841624956125e-06, "logits/chosen": -2.636507034301758, "logits/rejected": -2.518415689468384, "logps/chosen": -503.35247802734375, "logps/rejected": -511.513427734375, "loss": 0.6052, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.3775184154510498, "rewards/margins": 0.47160688042640686, "rewards/rejected": -1.8491252660751343, "step": 4400 }, { "epoch": 0.58, "eval_logits/chosen": -2.35198974609375, "eval_logits/rejected": -2.3669545650482178, "eval_logps/chosen": -461.1800537109375, "eval_logps/rejected": -468.2998352050781, "eval_loss": 0.6087808012962341, "eval_rewards/accuracies": 0.6639999747276306, "eval_rewards/chosen": -1.2843064069747925, "eval_rewards/margins": 0.3875918388366699, "eval_rewards/rejected": -1.671898365020752, "eval_runtime": 197.0059, "eval_samples_per_second": 10.152, "eval_steps_per_second": 5.076, "step": 4400 }, { "epoch": 0.58, "learning_rate": 2.2639104728636915e-06, "logits/chosen": -2.5947508811950684, "logits/rejected": -2.58724308013916, "logps/chosen": -426.2372131347656, "logps/rejected": -467.16937255859375, "loss": 0.5886, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.1018826961517334, "rewards/margins": 0.4221973419189453, "rewards/rejected": -1.5240800380706787, "step": 4410 }, { "epoch": 0.58, "eval_logits/chosen": -2.347571849822998, "eval_logits/rejected": -2.3628687858581543, "eval_logps/chosen": -457.931396484375, "eval_logps/rejected": -464.91552734375, "eval_loss": 0.6095851063728333, "eval_rewards/accuracies": 0.6679999828338623, "eval_rewards/chosen": -1.2518198490142822, "eval_rewards/margins": 0.38623523712158203, "eval_rewards/rejected": -1.6380552053451538, "eval_runtime": 197.0362, "eval_samples_per_second": 10.15, "eval_steps_per_second": 5.075, "step": 4410 }, { "epoch": 0.58, "learning_rate": 2.252541711609384e-06, "logits/chosen": -2.551729679107666, "logits/rejected": -2.4922897815704346, "logps/chosen": -436.5389099121094, "logps/rejected": -428.7633361816406, "loss": 0.586, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.1860450506210327, "rewards/margins": 0.4158903956413269, "rewards/rejected": -1.601935625076294, "step": 4420 }, { "epoch": 0.58, "eval_logits/chosen": -2.346620559692383, "eval_logits/rejected": -2.3620049953460693, "eval_logps/chosen": -454.94219970703125, "eval_logps/rejected": -461.5989074707031, "eval_loss": 0.609160840511322, "eval_rewards/accuracies": 0.6704999804496765, "eval_rewards/chosen": -1.2219277620315552, "eval_rewards/margins": 0.3829614222049713, "eval_rewards/rejected": -1.604889154434204, "eval_runtime": 197.0636, "eval_samples_per_second": 10.149, "eval_steps_per_second": 5.075, "step": 4420 }, { "epoch": 0.58, "learning_rate": 2.241178116056002e-06, "logits/chosen": -2.5624594688415527, "logits/rejected": -2.5428500175476074, "logps/chosen": -426.37109375, "logps/rejected": -437.63995361328125, "loss": 0.5653, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.156144618988037, "rewards/margins": 0.45035356283187866, "rewards/rejected": -1.60649836063385, "step": 4430 }, { "epoch": 0.58, "eval_logits/chosen": -2.349269390106201, "eval_logits/rejected": -2.364637613296509, "eval_logps/chosen": -454.94842529296875, "eval_logps/rejected": -461.6927490234375, "eval_loss": 0.6091820597648621, "eval_rewards/accuracies": 0.6685000061988831, "eval_rewards/chosen": -1.2219903469085693, "eval_rewards/margins": 0.3838370144367218, "eval_rewards/rejected": -1.6058274507522583, "eval_runtime": 196.9566, "eval_samples_per_second": 10.155, "eval_steps_per_second": 5.077, "step": 4430 }, { "epoch": 0.58, "learning_rate": 2.2298199234190236e-06, "logits/chosen": -2.4795172214508057, "logits/rejected": -2.5077686309814453, "logps/chosen": -477.9178771972656, "logps/rejected": -481.4457092285156, "loss": 0.5427, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.2306759357452393, "rewards/margins": 0.5230705738067627, "rewards/rejected": -1.7537466287612915, "step": 4440 }, { "epoch": 0.58, "eval_logits/chosen": -2.3430752754211426, "eval_logits/rejected": -2.3584113121032715, "eval_logps/chosen": -461.8674011230469, "eval_logps/rejected": -469.6636047363281, "eval_loss": 0.6097197532653809, "eval_rewards/accuracies": 0.6669999957084656, "eval_rewards/chosen": -1.2911797761917114, "eval_rewards/margins": 0.3943558931350708, "eval_rewards/rejected": -1.6855357885360718, "eval_runtime": 196.9233, "eval_samples_per_second": 10.156, "eval_steps_per_second": 5.078, "step": 4440 }, { "epoch": 0.58, "learning_rate": 2.218467370801138e-06, "logits/chosen": -2.5464415550231934, "logits/rejected": -2.5220420360565186, "logps/chosen": -467.94561767578125, "logps/rejected": -458.48199462890625, "loss": 0.6427, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.3079677820205688, "rewards/margins": 0.29971104860305786, "rewards/rejected": -1.607678771018982, "step": 4450 }, { "epoch": 0.58, "eval_logits/chosen": -2.350649833679199, "eval_logits/rejected": -2.366107225418091, "eval_logps/chosen": -462.7431945800781, "eval_logps/rejected": -470.6502990722656, "eval_loss": 0.6094748973846436, "eval_rewards/accuracies": 0.6650000214576721, "eval_rewards/chosen": -1.2999377250671387, "eval_rewards/margins": 0.39546507596969604, "eval_rewards/rejected": -1.695402979850769, "eval_runtime": 196.8685, "eval_samples_per_second": 10.159, "eval_steps_per_second": 5.08, "step": 4450 }, { "epoch": 0.58, "learning_rate": 2.207120695187304e-06, "logits/chosen": -2.4268229007720947, "logits/rejected": -2.4031078815460205, "logps/chosen": -478.80499267578125, "logps/rejected": -481.1709899902344, "loss": 0.5438, "rewards/accuracies": 0.75, "rewards/chosen": -1.222773790359497, "rewards/margins": 0.5444897413253784, "rewards/rejected": -1.767263650894165, "step": 4460 }, { "epoch": 0.58, "eval_logits/chosen": -2.359865188598633, "eval_logits/rejected": -2.3748998641967773, "eval_logps/chosen": -465.46929931640625, "eval_logps/rejected": -473.4424743652344, "eval_loss": 0.6078117489814758, "eval_rewards/accuracies": 0.6660000085830688, "eval_rewards/chosen": -1.3271992206573486, "eval_rewards/margins": 0.39612552523612976, "eval_rewards/rejected": -1.7233246564865112, "eval_runtime": 197.046, "eval_samples_per_second": 10.15, "eval_steps_per_second": 5.075, "step": 4460 }, { "epoch": 0.58, "learning_rate": 2.195780133439794e-06, "logits/chosen": -2.5647144317626953, "logits/rejected": -2.566028118133545, "logps/chosen": -478.5218200683594, "logps/rejected": -513.4711303710938, "loss": 0.6207, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.3200973272323608, "rewards/margins": 0.40641552209854126, "rewards/rejected": -1.7265126705169678, "step": 4470 }, { "epoch": 0.58, "eval_logits/chosen": -2.3541791439056396, "eval_logits/rejected": -2.3686718940734863, "eval_logps/chosen": -473.7710266113281, "eval_logps/rejected": -482.60931396484375, "eval_loss": 0.608197033405304, "eval_rewards/accuracies": 0.6654999852180481, "eval_rewards/chosen": -1.410216212272644, "eval_rewards/margins": 0.40477627515792847, "eval_rewards/rejected": -1.8149923086166382, "eval_runtime": 196.9358, "eval_samples_per_second": 10.156, "eval_steps_per_second": 5.078, "step": 4470 }, { "epoch": 0.59, "learning_rate": 2.1844459222932535e-06, "logits/chosen": -2.5640816688537598, "logits/rejected": -2.5077226161956787, "logps/chosen": -475.5809631347656, "logps/rejected": -474.53924560546875, "loss": 0.5768, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.2696446180343628, "rewards/margins": 0.47660988569259644, "rewards/rejected": -1.7462546825408936, "step": 4480 }, { "epoch": 0.59, "eval_logits/chosen": -2.3506596088409424, "eval_logits/rejected": -2.364856004714966, "eval_logps/chosen": -477.4462890625, "eval_logps/rejected": -486.5351867675781, "eval_loss": 0.6080268621444702, "eval_rewards/accuracies": 0.6654999852180481, "eval_rewards/chosen": -1.4469685554504395, "eval_rewards/margins": 0.40728288888931274, "eval_rewards/rejected": -1.8542513847351074, "eval_runtime": 197.2231, "eval_samples_per_second": 10.141, "eval_steps_per_second": 5.07, "step": 4480 }, { "epoch": 0.59, "learning_rate": 2.17311829834976e-06, "logits/chosen": -2.5868237018585205, "logits/rejected": -2.5791220664978027, "logps/chosen": -462.0890197753906, "logps/rejected": -485.8500061035156, "loss": 0.583, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.2890268564224243, "rewards/margins": 0.41418081521987915, "rewards/rejected": -1.7032077312469482, "step": 4490 }, { "epoch": 0.59, "eval_logits/chosen": -2.3469271659851074, "eval_logits/rejected": -2.36118221282959, "eval_logps/chosen": -479.05010986328125, "eval_logps/rejected": -488.24432373046875, "eval_loss": 0.6082322597503662, "eval_rewards/accuracies": 0.6679999828338623, "eval_rewards/chosen": -1.463006615638733, "eval_rewards/margins": 0.40833622217178345, "eval_rewards/rejected": -1.8713427782058716, "eval_runtime": 197.0382, "eval_samples_per_second": 10.15, "eval_steps_per_second": 5.075, "step": 4490 }, { "epoch": 0.59, "learning_rate": 2.1617974980738814e-06, "logits/chosen": -2.572697162628174, "logits/rejected": -2.563896656036377, "logps/chosen": -455.87713623046875, "logps/rejected": -458.12823486328125, "loss": 0.531, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -1.3974636793136597, "rewards/margins": 0.5185772180557251, "rewards/rejected": -1.9160410165786743, "step": 4500 }, { "epoch": 0.59, "eval_logits/chosen": -2.3504679203033447, "eval_logits/rejected": -2.3647711277008057, "eval_logps/chosen": -475.88604736328125, "eval_logps/rejected": -484.9482116699219, "eval_loss": 0.6084606647491455, "eval_rewards/accuracies": 0.6639999747276306, "eval_rewards/chosen": -1.4313663244247437, "eval_rewards/margins": 0.4070153832435608, "eval_rewards/rejected": -1.8383818864822388, "eval_runtime": 197.0121, "eval_samples_per_second": 10.152, "eval_steps_per_second": 5.076, "step": 4500 }, { "epoch": 0.59, "learning_rate": 2.150483757787744e-06, "logits/chosen": -2.575751781463623, "logits/rejected": -2.5314788818359375, "logps/chosen": -459.76483154296875, "logps/rejected": -441.937255859375, "loss": 0.5774, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.4474533796310425, "rewards/margins": 0.47381964325904846, "rewards/rejected": -1.9212729930877686, "step": 4510 }, { "epoch": 0.59, "eval_logits/chosen": -2.350123643875122, "eval_logits/rejected": -2.364333391189575, "eval_logps/chosen": -473.5544738769531, "eval_logps/rejected": -482.2587890625, "eval_loss": 0.6079715490341187, "eval_rewards/accuracies": 0.6654999852180481, "eval_rewards/chosen": -1.4080506563186646, "eval_rewards/margins": 0.40343719720840454, "eval_rewards/rejected": -1.8114880323410034, "eval_runtime": 197.1065, "eval_samples_per_second": 10.147, "eval_steps_per_second": 5.073, "step": 4510 }, { "epoch": 0.59, "learning_rate": 2.139177313666093e-06, "logits/chosen": -2.509402275085449, "logits/rejected": -2.524897336959839, "logps/chosen": -487.89910888671875, "logps/rejected": -468.90643310546875, "loss": 0.5757, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.24689519405365, "rewards/margins": 0.44425448775291443, "rewards/rejected": -1.6911497116088867, "step": 4520 }, { "epoch": 0.59, "eval_logits/chosen": -2.347217321395874, "eval_logits/rejected": -2.3612282276153564, "eval_logps/chosen": -472.8621826171875, "eval_logps/rejected": -481.3768615722656, "eval_loss": 0.6074300408363342, "eval_rewards/accuracies": 0.6644999980926514, "eval_rewards/chosen": -1.4011281728744507, "eval_rewards/margins": 0.40154018998146057, "eval_rewards/rejected": -1.8026682138442993, "eval_runtime": 197.0079, "eval_samples_per_second": 10.152, "eval_steps_per_second": 5.076, "step": 4520 }, { "epoch": 0.59, "learning_rate": 2.1278784017313688e-06, "logits/chosen": -2.5669217109680176, "logits/rejected": -2.5706307888031006, "logps/chosen": -495.63836669921875, "logps/rejected": -530.7364501953125, "loss": 0.6138, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.3805519342422485, "rewards/margins": 0.36727243661880493, "rewards/rejected": -1.7478240728378296, "step": 4530 }, { "epoch": 0.59, "eval_logits/chosen": -2.3434321880340576, "eval_logits/rejected": -2.3577535152435303, "eval_logps/chosen": -471.2819519042969, "eval_logps/rejected": -479.9246826171875, "eval_loss": 0.6079375147819519, "eval_rewards/accuracies": 0.6639999747276306, "eval_rewards/chosen": -1.3853251934051514, "eval_rewards/margins": 0.40282142162323, "eval_rewards/rejected": -1.7881464958190918, "eval_runtime": 197.0815, "eval_samples_per_second": 10.148, "eval_steps_per_second": 5.074, "step": 4530 }, { "epoch": 0.59, "learning_rate": 2.116587257848776e-06, "logits/chosen": -2.5853219032287598, "logits/rejected": -2.5926265716552734, "logps/chosen": -447.34991455078125, "logps/rejected": -500.7210998535156, "loss": 0.6412, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.4180644750595093, "rewards/margins": 0.32260221242904663, "rewards/rejected": -1.7406667470932007, "step": 4540 }, { "epoch": 0.59, "eval_logits/chosen": -2.3363149166107178, "eval_logits/rejected": -2.3509626388549805, "eval_logps/chosen": -471.30853271484375, "eval_logps/rejected": -480.26007080078125, "eval_loss": 0.6089949011802673, "eval_rewards/accuracies": 0.6685000061988831, "eval_rewards/chosen": -1.3855911493301392, "eval_rewards/margins": 0.40590932965278625, "eval_rewards/rejected": -1.7915005683898926, "eval_runtime": 197.0543, "eval_samples_per_second": 10.149, "eval_steps_per_second": 5.075, "step": 4540 }, { "epoch": 0.6, "learning_rate": 2.105304117721361e-06, "logits/chosen": -2.397624969482422, "logits/rejected": -2.4318509101867676, "logps/chosen": -404.4676208496094, "logps/rejected": -399.29339599609375, "loss": 0.6387, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -1.3216915130615234, "rewards/margins": 0.3007916212081909, "rewards/rejected": -1.6224830150604248, "step": 4550 }, { "epoch": 0.6, "eval_logits/chosen": -2.3398046493530273, "eval_logits/rejected": -2.354630470275879, "eval_logps/chosen": -469.4595031738281, "eval_logps/rejected": -478.1709899902344, "eval_loss": 0.6085323095321655, "eval_rewards/accuracies": 0.6700000166893005, "eval_rewards/chosen": -1.3671008348464966, "eval_rewards/margins": 0.4035090506076813, "eval_rewards/rejected": -1.770609736442566, "eval_runtime": 197.043, "eval_samples_per_second": 10.15, "eval_steps_per_second": 5.075, "step": 4550 }, { "epoch": 0.6, "learning_rate": 2.0940292168850913e-06, "logits/chosen": -2.455711841583252, "logits/rejected": -2.4487950801849365, "logps/chosen": -457.455810546875, "logps/rejected": -445.8837890625, "loss": 0.6527, "rewards/accuracies": 0.625, "rewards/chosen": -1.3930675983428955, "rewards/margins": 0.2900000810623169, "rewards/rejected": -1.6830676794052124, "step": 4560 }, { "epoch": 0.6, "eval_logits/chosen": -2.3467464447021484, "eval_logits/rejected": -2.3617465496063232, "eval_logps/chosen": -465.6805419921875, "eval_logps/rejected": -473.7642517089844, "eval_loss": 0.6076022386550903, "eval_rewards/accuracies": 0.6694999933242798, "eval_rewards/chosen": -1.3293112516403198, "eval_rewards/margins": 0.39723050594329834, "eval_rewards/rejected": -1.7265417575836182, "eval_runtime": 196.898, "eval_samples_per_second": 10.158, "eval_steps_per_second": 5.079, "step": 4560 }, { "epoch": 0.6, "learning_rate": 2.082762790703939e-06, "logits/chosen": -2.5249645709991455, "logits/rejected": -2.4668526649475098, "logps/chosen": -469.11578369140625, "logps/rejected": -475.1290588378906, "loss": 0.6187, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.3228938579559326, "rewards/margins": 0.34750640392303467, "rewards/rejected": -1.6704002618789673, "step": 4570 }, { "epoch": 0.6, "eval_logits/chosen": -2.355630874633789, "eval_logits/rejected": -2.370851755142212, "eval_logps/chosen": -459.7823181152344, "eval_logps/rejected": -466.9198303222656, "eval_loss": 0.6069644093513489, "eval_rewards/accuracies": 0.6654999852180481, "eval_rewards/chosen": -1.2703286409378052, "eval_rewards/margins": 0.38776928186416626, "eval_rewards/rejected": -1.6580978631973267, "eval_runtime": 197.2739, "eval_samples_per_second": 10.138, "eval_steps_per_second": 5.069, "step": 4570 }, { "epoch": 0.6, "learning_rate": 2.0715050743649674e-06, "logits/chosen": -2.588480234146118, "logits/rejected": -2.560148239135742, "logps/chosen": -409.1583251953125, "logps/rejected": -486.67620849609375, "loss": 0.5671, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.1350984573364258, "rewards/margins": 0.4589425027370453, "rewards/rejected": -1.5940409898757935, "step": 4580 }, { "epoch": 0.6, "eval_logits/chosen": -2.3605380058288574, "eval_logits/rejected": -2.375964403152466, "eval_logps/chosen": -456.28619384765625, "eval_logps/rejected": -463.0014953613281, "eval_loss": 0.6068898439407349, "eval_rewards/accuracies": 0.6664999723434448, "eval_rewards/chosen": -1.2353678941726685, "eval_rewards/margins": 0.38354694843292236, "eval_rewards/rejected": -1.6189148426055908, "eval_runtime": 197.0899, "eval_samples_per_second": 10.148, "eval_steps_per_second": 5.074, "step": 4580 }, { "epoch": 0.6, "learning_rate": 2.060256302873421e-06, "logits/chosen": -2.578284502029419, "logits/rejected": -2.5939929485321045, "logps/chosen": -418.2554626464844, "logps/rejected": -480.61383056640625, "loss": 0.5615, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.1238166093826294, "rewards/margins": 0.49349433183670044, "rewards/rejected": -1.617310881614685, "step": 4590 }, { "epoch": 0.6, "eval_logits/chosen": -2.36183762550354, "eval_logits/rejected": -2.377291440963745, "eval_logps/chosen": -455.7953186035156, "eval_logps/rejected": -462.3291931152344, "eval_loss": 0.6073416471481323, "eval_rewards/accuracies": 0.6639999747276306, "eval_rewards/chosen": -1.2304589748382568, "eval_rewards/margins": 0.38173264265060425, "eval_rewards/rejected": -1.6121916770935059, "eval_runtime": 197.0365, "eval_samples_per_second": 10.15, "eval_steps_per_second": 5.075, "step": 4590 }, { "epoch": 0.6, "learning_rate": 2.049016711047822e-06, "logits/chosen": -2.6140739917755127, "logits/rejected": -2.5730433464050293, "logps/chosen": -448.59765625, "logps/rejected": -468.0848693847656, "loss": 0.5716, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.2782400846481323, "rewards/margins": 0.44344210624694824, "rewards/rejected": -1.7216823101043701, "step": 4600 }, { "epoch": 0.6, "eval_logits/chosen": -2.358152151107788, "eval_logits/rejected": -2.373021125793457, "eval_logps/chosen": -460.1275634765625, "eval_logps/rejected": -467.31146240234375, "eval_loss": 0.6074530482292175, "eval_rewards/accuracies": 0.6644999980926514, "eval_rewards/chosen": -1.2737818956375122, "eval_rewards/margins": 0.3882325291633606, "eval_rewards/rejected": -1.6620142459869385, "eval_runtime": 197.1299, "eval_samples_per_second": 10.146, "eval_steps_per_second": 5.073, "step": 4600 }, { "epoch": 0.6, "learning_rate": 2.037786533515064e-06, "logits/chosen": -2.63139009475708, "logits/rejected": -2.6090714931488037, "logps/chosen": -522.1685791015625, "logps/rejected": -497.5794982910156, "loss": 0.6994, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -1.4936577081680298, "rewards/margins": 0.17892040312290192, "rewards/rejected": -1.67257821559906, "step": 4610 }, { "epoch": 0.6, "eval_logits/chosen": -2.3628578186035156, "eval_logits/rejected": -2.3777432441711426, "eval_logps/chosen": -457.7207946777344, "eval_logps/rejected": -464.57574462890625, "eval_loss": 0.6071527004241943, "eval_rewards/accuracies": 0.6664999723434448, "eval_rewards/chosen": -1.2497135400772095, "eval_rewards/margins": 0.3849438726902008, "eval_rewards/rejected": -1.6346575021743774, "eval_runtime": 197.0828, "eval_samples_per_second": 10.148, "eval_steps_per_second": 5.074, "step": 4610 }, { "epoch": 0.6, "learning_rate": 2.02656600470552e-06, "logits/chosen": -2.5862843990325928, "logits/rejected": -2.595778703689575, "logps/chosen": -451.0542907714844, "logps/rejected": -471.8365173339844, "loss": 0.5692, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.250226616859436, "rewards/margins": 0.48750025033950806, "rewards/rejected": -1.7377268075942993, "step": 4620 }, { "epoch": 0.6, "eval_logits/chosen": -2.3665430545806885, "eval_logits/rejected": -2.381023406982422, "eval_logps/chosen": -457.5531921386719, "eval_logps/rejected": -464.34783935546875, "eval_loss": 0.6064249277114868, "eval_rewards/accuracies": 0.6675000190734863, "eval_rewards/chosen": -1.2480376958847046, "eval_rewards/margins": 0.3843400478363037, "eval_rewards/rejected": -1.6323778629302979, "eval_runtime": 196.9761, "eval_samples_per_second": 10.154, "eval_steps_per_second": 5.077, "step": 4620 }, { "epoch": 0.61, "learning_rate": 2.015355358848144e-06, "logits/chosen": -2.4676127433776855, "logits/rejected": -2.5058672428131104, "logps/chosen": -402.6142883300781, "logps/rejected": -451.1036682128906, "loss": 0.6417, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.2782353162765503, "rewards/margins": 0.3019106388092041, "rewards/rejected": -1.580146074295044, "step": 4630 }, { "epoch": 0.61, "eval_logits/chosen": -2.363272190093994, "eval_logits/rejected": -2.3775339126586914, "eval_logps/chosen": -459.9270935058594, "eval_logps/rejected": -467.0786437988281, "eval_loss": 0.6064499616622925, "eval_rewards/accuracies": 0.6654999852180481, "eval_rewards/chosen": -1.2717769145965576, "eval_rewards/margins": 0.3879096508026123, "eval_rewards/rejected": -1.6596864461898804, "eval_runtime": 197.3901, "eval_samples_per_second": 10.132, "eval_steps_per_second": 5.066, "step": 4630 }, { "epoch": 0.61, "learning_rate": 2.004154829965582e-06, "logits/chosen": -2.5863049030303955, "logits/rejected": -2.5930287837982178, "logps/chosen": -465.68524169921875, "logps/rejected": -476.76873779296875, "loss": 0.5776, "rewards/accuracies": 0.6875, "rewards/chosen": -1.19584321975708, "rewards/margins": 0.40354451537132263, "rewards/rejected": -1.599387764930725, "step": 4640 }, { "epoch": 0.61, "eval_logits/chosen": -2.360830783843994, "eval_logits/rejected": -2.3751513957977295, "eval_logps/chosen": -460.028076171875, "eval_logps/rejected": -467.1726989746094, "eval_loss": 0.6065632104873657, "eval_rewards/accuracies": 0.6679999828338623, "eval_rewards/chosen": -1.272786259651184, "eval_rewards/margins": 0.3878403902053833, "eval_rewards/rejected": -1.6606266498565674, "eval_runtime": 197.0543, "eval_samples_per_second": 10.149, "eval_steps_per_second": 5.075, "step": 4640 }, { "epoch": 0.61, "learning_rate": 1.99296465186929e-06, "logits/chosen": -2.593928098678589, "logits/rejected": -2.556190013885498, "logps/chosen": -455.4571228027344, "logps/rejected": -415.518798828125, "loss": 0.5816, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.0729163885116577, "rewards/margins": 0.3699313700199127, "rewards/rejected": -1.442847728729248, "step": 4650 }, { "epoch": 0.61, "eval_logits/chosen": -2.365307092666626, "eval_logits/rejected": -2.380260944366455, "eval_logps/chosen": -456.14019775390625, "eval_logps/rejected": -462.9325256347656, "eval_loss": 0.6066238880157471, "eval_rewards/accuracies": 0.671500027179718, "eval_rewards/chosen": -1.233907699584961, "eval_rewards/margins": 0.3843171000480652, "eval_rewards/rejected": -1.618224859237671, "eval_runtime": 197.2054, "eval_samples_per_second": 10.142, "eval_steps_per_second": 5.071, "step": 4650 }, { "epoch": 0.61, "learning_rate": 1.9817850581546488e-06, "logits/chosen": -2.5619285106658936, "logits/rejected": -2.5544750690460205, "logps/chosen": -470.73931884765625, "logps/rejected": -511.1991271972656, "loss": 0.6182, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.1987391710281372, "rewards/margins": 0.35175901651382446, "rewards/rejected": -1.5504982471466064, "step": 4660 }, { "epoch": 0.61, "eval_logits/chosen": -2.366844654083252, "eval_logits/rejected": -2.381772518157959, "eval_logps/chosen": -456.302490234375, "eval_logps/rejected": -463.168701171875, "eval_loss": 0.6066789031028748, "eval_rewards/accuracies": 0.6725000143051147, "eval_rewards/chosen": -1.235530972480774, "eval_rewards/margins": 0.3850558400154114, "eval_rewards/rejected": -1.6205867528915405, "eval_runtime": 197.2261, "eval_samples_per_second": 10.141, "eval_steps_per_second": 5.07, "step": 4660 }, { "epoch": 0.61, "learning_rate": 1.970616282196091e-06, "logits/chosen": -2.5769898891448975, "logits/rejected": -2.601787567138672, "logps/chosen": -437.11962890625, "logps/rejected": -451.64862060546875, "loss": 0.6184, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -1.2210407257080078, "rewards/margins": 0.3390752375125885, "rewards/rejected": -1.5601160526275635, "step": 4670 }, { "epoch": 0.61, "eval_logits/chosen": -2.368856906890869, "eval_logits/rejected": -2.383789539337158, "eval_logps/chosen": -454.7909851074219, "eval_logps/rejected": -461.31109619140625, "eval_loss": 0.6065412759780884, "eval_rewards/accuracies": 0.6729999780654907, "eval_rewards/chosen": -1.2204158306121826, "eval_rewards/margins": 0.3815949261188507, "eval_rewards/rejected": -1.602010726928711, "eval_runtime": 197.1533, "eval_samples_per_second": 10.144, "eval_steps_per_second": 5.072, "step": 4670 }, { "epoch": 0.61, "learning_rate": 1.959458557142228e-06, "logits/chosen": -2.617663860321045, "logits/rejected": -2.5870256423950195, "logps/chosen": -432.1153869628906, "logps/rejected": -464.52020263671875, "loss": 0.7167, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -1.2370940446853638, "rewards/margins": 0.1650908887386322, "rewards/rejected": -1.4021847248077393, "step": 4680 }, { "epoch": 0.61, "eval_logits/chosen": -2.3691041469573975, "eval_logits/rejected": -2.3844714164733887, "eval_logps/chosen": -451.03857421875, "eval_logps/rejected": -456.8002624511719, "eval_loss": 0.6063486337661743, "eval_rewards/accuracies": 0.671999990940094, "eval_rewards/chosen": -1.1828911304473877, "eval_rewards/margins": 0.37401124835014343, "eval_rewards/rejected": -1.556902289390564, "eval_runtime": 196.918, "eval_samples_per_second": 10.157, "eval_steps_per_second": 5.078, "step": 4680 }, { "epoch": 0.61, "learning_rate": 1.948312115910982e-06, "logits/chosen": -2.5269622802734375, "logits/rejected": -2.5282649993896484, "logps/chosen": -453.5530700683594, "logps/rejected": -455.9603576660156, "loss": 0.6275, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -1.1158868074417114, "rewards/margins": 0.47353777289390564, "rewards/rejected": -1.58942449092865, "step": 4690 }, { "epoch": 0.61, "eval_logits/chosen": -2.37105393409729, "eval_logits/rejected": -2.386112928390503, "eval_logps/chosen": -449.9285583496094, "eval_logps/rejected": -455.6019287109375, "eval_loss": 0.6053135395050049, "eval_rewards/accuracies": 0.6765000224113464, "eval_rewards/chosen": -1.171791672706604, "eval_rewards/margins": 0.3731272518634796, "eval_rewards/rejected": -1.5449188947677612, "eval_runtime": 197.1224, "eval_samples_per_second": 10.146, "eval_steps_per_second": 5.073, "step": 4690 }, { "epoch": 0.62, "learning_rate": 1.937177191184729e-06, "logits/chosen": -2.5588791370391846, "logits/rejected": -2.5623703002929688, "logps/chosen": -411.646484375, "logps/rejected": -428.03515625, "loss": 0.6771, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.1427921056747437, "rewards/margins": 0.18787182867527008, "rewards/rejected": -1.3306639194488525, "step": 4700 }, { "epoch": 0.62, "eval_logits/chosen": -2.3747167587280273, "eval_logits/rejected": -2.3900814056396484, "eval_logps/chosen": -445.68310546875, "eval_logps/rejected": -450.5074462890625, "eval_loss": 0.6052196621894836, "eval_rewards/accuracies": 0.671999990940094, "eval_rewards/chosen": -1.129336953163147, "eval_rewards/margins": 0.3646370768547058, "eval_rewards/rejected": -1.493973970413208, "eval_runtime": 196.884, "eval_samples_per_second": 10.158, "eval_steps_per_second": 5.079, "step": 4700 }, { "epoch": 0.62, "learning_rate": 1.9260540154054317e-06, "logits/chosen": -2.599818229675293, "logits/rejected": -2.5831518173217773, "logps/chosen": -407.63092041015625, "logps/rejected": -445.3501892089844, "loss": 0.5374, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.0230557918548584, "rewards/margins": 0.5404427647590637, "rewards/rejected": -1.5634984970092773, "step": 4710 }, { "epoch": 0.62, "eval_logits/chosen": -2.3762285709381104, "eval_logits/rejected": -2.3915481567382812, "eval_logps/chosen": -446.7705993652344, "eval_logps/rejected": -451.7310791015625, "eval_loss": 0.6049104928970337, "eval_rewards/accuracies": 0.6729999780654907, "eval_rewards/chosen": -1.140211820602417, "eval_rewards/margins": 0.365998774766922, "eval_rewards/rejected": -1.5062106847763062, "eval_runtime": 196.9674, "eval_samples_per_second": 10.154, "eval_steps_per_second": 5.077, "step": 4710 }, { "epoch": 0.62, "learning_rate": 1.9149428207697983e-06, "logits/chosen": -2.614574670791626, "logits/rejected": -2.602724552154541, "logps/chosen": -444.8438415527344, "logps/rejected": -457.318603515625, "loss": 0.6618, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.1555159091949463, "rewards/margins": 0.2247290313243866, "rewards/rejected": -1.3802449703216553, "step": 4720 }, { "epoch": 0.62, "eval_logits/chosen": -2.376986026763916, "eval_logits/rejected": -2.3926074504852295, "eval_logps/chosen": -446.311767578125, "eval_logps/rejected": -451.22210693359375, "eval_loss": 0.6049630045890808, "eval_rewards/accuracies": 0.6735000014305115, "eval_rewards/chosen": -1.135623812675476, "eval_rewards/margins": 0.3654967248439789, "eval_rewards/rejected": -1.5011205673217773, "eval_runtime": 197.1008, "eval_samples_per_second": 10.147, "eval_steps_per_second": 5.074, "step": 4720 }, { "epoch": 0.62, "learning_rate": 1.9038438392244262e-06, "logits/chosen": -2.5899956226348877, "logits/rejected": -2.623196840286255, "logps/chosen": -448.413330078125, "logps/rejected": -460.62701416015625, "loss": 0.5748, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.0493090152740479, "rewards/margins": 0.40030479431152344, "rewards/rejected": -1.4496138095855713, "step": 4730 }, { "epoch": 0.62, "eval_logits/chosen": -2.367912769317627, "eval_logits/rejected": -2.3833110332489014, "eval_logps/chosen": -450.4692687988281, "eval_logps/rejected": -455.7525634765625, "eval_loss": 0.6047419309616089, "eval_rewards/accuracies": 0.6729999780654907, "eval_rewards/chosen": -1.177198886871338, "eval_rewards/margins": 0.36922687292099, "eval_rewards/rejected": -1.5464258193969727, "eval_runtime": 196.8485, "eval_samples_per_second": 10.16, "eval_steps_per_second": 5.08, "step": 4730 }, { "epoch": 0.62, "learning_rate": 1.8927573024609666e-06, "logits/chosen": -2.5434505939483643, "logits/rejected": -2.5118329524993896, "logps/chosen": -391.2030334472656, "logps/rejected": -415.4365234375, "loss": 0.5787, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.1048915386199951, "rewards/margins": 0.4196711480617523, "rewards/rejected": -1.5245627164840698, "step": 4740 }, { "epoch": 0.62, "eval_logits/chosen": -2.356715202331543, "eval_logits/rejected": -2.3718693256378174, "eval_logps/chosen": -455.8304138183594, "eval_logps/rejected": -461.651611328125, "eval_loss": 0.6052024960517883, "eval_rewards/accuracies": 0.6740000247955322, "eval_rewards/chosen": -1.2308100461959839, "eval_rewards/margins": 0.37460586428642273, "eval_rewards/rejected": -1.605415940284729, "eval_runtime": 196.9467, "eval_samples_per_second": 10.155, "eval_steps_per_second": 5.078, "step": 4740 }, { "epoch": 0.62, "learning_rate": 1.8816834419112845e-06, "logits/chosen": -2.5052685737609863, "logits/rejected": -2.5242958068847656, "logps/chosen": -430.42303466796875, "logps/rejected": -435.1065368652344, "loss": 0.5646, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.158060908317566, "rewards/margins": 0.5619903802871704, "rewards/rejected": -1.7200514078140259, "step": 4750 }, { "epoch": 0.62, "eval_logits/chosen": -2.349745512008667, "eval_logits/rejected": -2.364739179611206, "eval_logps/chosen": -459.4644470214844, "eval_logps/rejected": -465.70050048828125, "eval_loss": 0.6054902076721191, "eval_rewards/accuracies": 0.671500027179718, "eval_rewards/chosen": -1.2671502828598022, "eval_rewards/margins": 0.37875503301620483, "eval_rewards/rejected": -1.6459051370620728, "eval_runtime": 197.0325, "eval_samples_per_second": 10.151, "eval_steps_per_second": 5.075, "step": 4750 }, { "epoch": 0.62, "learning_rate": 1.8706224887426283e-06, "logits/chosen": -2.541607141494751, "logits/rejected": -2.5702714920043945, "logps/chosen": -462.774658203125, "logps/rejected": -490.91314697265625, "loss": 0.6502, "rewards/accuracies": 0.625, "rewards/chosen": -1.2841370105743408, "rewards/margins": 0.2700539827346802, "rewards/rejected": -1.554190993309021, "step": 4760 }, { "epoch": 0.62, "eval_logits/chosen": -2.3464877605438232, "eval_logits/rejected": -2.361438512802124, "eval_logps/chosen": -459.6805114746094, "eval_logps/rejected": -465.8286437988281, "eval_loss": 0.60645592212677, "eval_rewards/accuracies": 0.6685000061988831, "eval_rewards/chosen": -1.2693109512329102, "eval_rewards/margins": 0.37787550687789917, "eval_rewards/rejected": -1.647186279296875, "eval_runtime": 196.8861, "eval_samples_per_second": 10.158, "eval_steps_per_second": 5.079, "step": 4760 }, { "epoch": 0.62, "learning_rate": 1.8595746738528045e-06, "logits/chosen": -2.5531961917877197, "logits/rejected": -2.559727191925049, "logps/chosen": -429.28912353515625, "logps/rejected": -492.12554931640625, "loss": 0.5963, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.1560680866241455, "rewards/margins": 0.4180780351161957, "rewards/rejected": -1.5741461515426636, "step": 4770 }, { "epoch": 0.62, "eval_logits/chosen": -2.3467257022857666, "eval_logits/rejected": -2.3617849349975586, "eval_logps/chosen": -459.44964599609375, "eval_logps/rejected": -465.6266174316406, "eval_loss": 0.6069409847259521, "eval_rewards/accuracies": 0.6704999804496765, "eval_rewards/chosen": -1.2670023441314697, "eval_rewards/margins": 0.3781636953353882, "eval_rewards/rejected": -1.6451661586761475, "eval_runtime": 196.8162, "eval_samples_per_second": 10.162, "eval_steps_per_second": 5.081, "step": 4770 }, { "epoch": 0.63, "learning_rate": 1.8485402278653584e-06, "logits/chosen": -2.547219753265381, "logits/rejected": -2.548625946044922, "logps/chosen": -431.35052490234375, "logps/rejected": -445.52410888671875, "loss": 0.5687, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.306722640991211, "rewards/margins": 0.44679850339889526, "rewards/rejected": -1.7535209655761719, "step": 4780 }, { "epoch": 0.63, "eval_logits/chosen": -2.3422722816467285, "eval_logits/rejected": -2.357463836669922, "eval_logps/chosen": -461.4661560058594, "eval_logps/rejected": -467.7752990722656, "eval_loss": 0.608340322971344, "eval_rewards/accuracies": 0.6660000085830688, "eval_rewards/chosen": -1.2871674299240112, "eval_rewards/margins": 0.37948548793792725, "eval_rewards/rejected": -1.666652798652649, "eval_runtime": 196.9486, "eval_samples_per_second": 10.155, "eval_steps_per_second": 5.077, "step": 4780 }, { "epoch": 0.63, "learning_rate": 1.8375193811247577e-06, "logits/chosen": -2.454245090484619, "logits/rejected": -2.420996904373169, "logps/chosen": -437.4507751464844, "logps/rejected": -432.07818603515625, "loss": 0.644, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.3491953611373901, "rewards/margins": 0.25019291043281555, "rewards/rejected": -1.5993882417678833, "step": 4790 }, { "epoch": 0.63, "eval_logits/chosen": -2.3409667015075684, "eval_logits/rejected": -2.3561835289001465, "eval_logps/chosen": -461.2978820800781, "eval_logps/rejected": -467.45684814453125, "eval_loss": 0.608421266078949, "eval_rewards/accuracies": 0.6669999957084656, "eval_rewards/chosen": -1.2854849100112915, "eval_rewards/margins": 0.37798330187797546, "eval_rewards/rejected": -1.6634680032730103, "eval_runtime": 197.0826, "eval_samples_per_second": 10.148, "eval_steps_per_second": 5.074, "step": 4790 }, { "epoch": 0.63, "learning_rate": 1.826512363691586e-06, "logits/chosen": -2.5934157371520996, "logits/rejected": -2.5818896293640137, "logps/chosen": -464.135986328125, "logps/rejected": -465.83282470703125, "loss": 0.6323, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.2174699306488037, "rewards/margins": 0.3847096264362335, "rewards/rejected": -1.6021795272827148, "step": 4800 }, { "epoch": 0.63, "eval_logits/chosen": -2.336951732635498, "eval_logits/rejected": -2.351977586746216, "eval_logps/chosen": -461.8417053222656, "eval_logps/rejected": -467.85968017578125, "eval_loss": 0.608333170413971, "eval_rewards/accuracies": 0.6654999852180481, "eval_rewards/chosen": -1.2909232378005981, "eval_rewards/margins": 0.37657347321510315, "eval_rewards/rejected": -1.667496681213379, "eval_runtime": 197.0144, "eval_samples_per_second": 10.152, "eval_steps_per_second": 5.076, "step": 4800 }, { "epoch": 0.63, "learning_rate": 1.8155194053377391e-06, "logits/chosen": -2.559887647628784, "logits/rejected": -2.5054869651794434, "logps/chosen": -448.12042236328125, "logps/rejected": -444.4010314941406, "loss": 0.575, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.2008377313613892, "rewards/margins": 0.487928569316864, "rewards/rejected": -1.6887662410736084, "step": 4810 }, { "epoch": 0.63, "eval_logits/chosen": -2.336354970932007, "eval_logits/rejected": -2.3510005474090576, "eval_logps/chosen": -464.4339599609375, "eval_logps/rejected": -471.0044860839844, "eval_loss": 0.607102632522583, "eval_rewards/accuracies": 0.6675000190734863, "eval_rewards/chosen": -1.3168458938598633, "eval_rewards/margins": 0.3820990025997162, "eval_rewards/rejected": -1.6989449262619019, "eval_runtime": 197.1694, "eval_samples_per_second": 10.144, "eval_steps_per_second": 5.072, "step": 4810 }, { "epoch": 0.63, "learning_rate": 1.80454073554163e-06, "logits/chosen": -2.514131784439087, "logits/rejected": -2.4912569522857666, "logps/chosen": -406.12646484375, "logps/rejected": -405.43072509765625, "loss": 0.645, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.2258880138397217, "rewards/margins": 0.30174189805984497, "rewards/rejected": -1.527630090713501, "step": 4820 }, { "epoch": 0.63, "eval_logits/chosen": -2.3418140411376953, "eval_logits/rejected": -2.356260299682617, "eval_logps/chosen": -465.5768127441406, "eval_logps/rejected": -472.4665222167969, "eval_loss": 0.6061503291130066, "eval_rewards/accuracies": 0.6690000295639038, "eval_rewards/chosen": -1.3282736539840698, "eval_rewards/margins": 0.3852910101413727, "eval_rewards/rejected": -1.7135647535324097, "eval_runtime": 197.061, "eval_samples_per_second": 10.149, "eval_steps_per_second": 5.075, "step": 4820 }, { "epoch": 0.63, "learning_rate": 1.7935765834833966e-06, "logits/chosen": -2.5507476329803467, "logits/rejected": -2.5161209106445312, "logps/chosen": -430.421142578125, "logps/rejected": -504.8538513183594, "loss": 0.5161, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.164041519165039, "rewards/margins": 0.6415061354637146, "rewards/rejected": -1.8055477142333984, "step": 4830 }, { "epoch": 0.63, "eval_logits/chosen": -2.3437082767486572, "eval_logits/rejected": -2.357666015625, "eval_logps/chosen": -467.56988525390625, "eval_logps/rejected": -474.6637268066406, "eval_loss": 0.6059348583221436, "eval_rewards/accuracies": 0.6685000061988831, "eval_rewards/chosen": -1.3482048511505127, "eval_rewards/margins": 0.38733214139938354, "eval_rewards/rejected": -1.735536813735962, "eval_runtime": 196.9656, "eval_samples_per_second": 10.154, "eval_steps_per_second": 5.077, "step": 4830 }, { "epoch": 0.63, "learning_rate": 1.7826271780401182e-06, "logits/chosen": -2.353175640106201, "logits/rejected": -2.387111186981201, "logps/chosen": -439.74078369140625, "logps/rejected": -449.5003967285156, "loss": 0.5804, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.3335378170013428, "rewards/margins": 0.39224615693092346, "rewards/rejected": -1.7257843017578125, "step": 4840 }, { "epoch": 0.63, "eval_logits/chosen": -2.3427133560180664, "eval_logits/rejected": -2.3566486835479736, "eval_logps/chosen": -469.28875732421875, "eval_logps/rejected": -476.5825500488281, "eval_loss": 0.6061907410621643, "eval_rewards/accuracies": 0.6685000061988831, "eval_rewards/chosen": -1.3653934001922607, "eval_rewards/margins": 0.3893316686153412, "eval_rewards/rejected": -1.7547252178192139, "eval_runtime": 197.0425, "eval_samples_per_second": 10.15, "eval_steps_per_second": 5.075, "step": 4840 }, { "epoch": 0.63, "learning_rate": 1.7716927477810389e-06, "logits/chosen": -2.543253183364868, "logits/rejected": -2.5667479038238525, "logps/chosen": -467.21527099609375, "logps/rejected": -513.29248046875, "loss": 0.571, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.4500186443328857, "rewards/margins": 0.5679537057876587, "rewards/rejected": -2.017972230911255, "step": 4850 }, { "epoch": 0.63, "eval_logits/chosen": -2.3366286754608154, "eval_logits/rejected": -2.350470542907715, "eval_logps/chosen": -472.86968994140625, "eval_logps/rejected": -480.5473937988281, "eval_loss": 0.6069199442863464, "eval_rewards/accuracies": 0.6704999804496765, "eval_rewards/chosen": -1.401202917098999, "eval_rewards/margins": 0.39317089319229126, "eval_rewards/rejected": -1.7943737506866455, "eval_runtime": 197.3065, "eval_samples_per_second": 10.137, "eval_steps_per_second": 5.068, "step": 4850 }, { "epoch": 0.64, "learning_rate": 1.7607735209627953e-06, "logits/chosen": -2.544330596923828, "logits/rejected": -2.4630868434906006, "logps/chosen": -475.41070556640625, "logps/rejected": -475.90728759765625, "loss": 0.5553, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.4359493255615234, "rewards/margins": 0.5188180208206177, "rewards/rejected": -1.9547672271728516, "step": 4860 }, { "epoch": 0.64, "eval_logits/chosen": -2.330714702606201, "eval_logits/rejected": -2.344393491744995, "eval_logps/chosen": -475.8780517578125, "eval_logps/rejected": -483.9844055175781, "eval_loss": 0.6073537468910217, "eval_rewards/accuracies": 0.6704999804496765, "eval_rewards/chosen": -1.4312864542007446, "eval_rewards/margins": 0.39745715260505676, "eval_rewards/rejected": -1.8287436962127686, "eval_runtime": 197.1786, "eval_samples_per_second": 10.143, "eval_steps_per_second": 5.072, "step": 4860 }, { "epoch": 0.64, "learning_rate": 1.749869725524651e-06, "logits/chosen": -2.556461811065674, "logits/rejected": -2.519881010055542, "logps/chosen": -482.1178283691406, "logps/rejected": -488.8779296875, "loss": 0.5774, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.4461584091186523, "rewards/margins": 0.4897529184818268, "rewards/rejected": -1.9359114170074463, "step": 4870 }, { "epoch": 0.64, "eval_logits/chosen": -2.3231258392333984, "eval_logits/rejected": -2.3361117839813232, "eval_logps/chosen": -482.7806396484375, "eval_logps/rejected": -491.5416259765625, "eval_loss": 0.6079848408699036, "eval_rewards/accuracies": 0.6700000166893005, "eval_rewards/chosen": -1.5003119707107544, "eval_rewards/margins": 0.4040038287639618, "eval_rewards/rejected": -1.9043160676956177, "eval_runtime": 197.076, "eval_samples_per_second": 10.148, "eval_steps_per_second": 5.074, "step": 4870 }, { "epoch": 0.64, "learning_rate": 1.7389815890837392e-06, "logits/chosen": -2.466991901397705, "logits/rejected": -2.4719462394714355, "logps/chosen": -476.388427734375, "logps/rejected": -549.646240234375, "loss": 0.5515, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.4125124216079712, "rewards/margins": 0.5354470014572144, "rewards/rejected": -1.947959303855896, "step": 4880 }, { "epoch": 0.64, "eval_logits/chosen": -2.3077244758605957, "eval_logits/rejected": -2.3208236694335938, "eval_logps/chosen": -487.87640380859375, "eval_logps/rejected": -497.2055358886719, "eval_loss": 0.6097118258476257, "eval_rewards/accuracies": 0.6754999756813049, "eval_rewards/chosen": -1.5512698888778687, "eval_rewards/margins": 0.4096851646900177, "eval_rewards/rejected": -1.960955023765564, "eval_runtime": 197.1263, "eval_samples_per_second": 10.146, "eval_steps_per_second": 5.073, "step": 4880 }, { "epoch": 0.64, "learning_rate": 1.7281093389303105e-06, "logits/chosen": -2.5559370517730713, "logits/rejected": -2.5300230979919434, "logps/chosen": -454.37158203125, "logps/rejected": -464.384521484375, "loss": 0.6337, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.4563044309616089, "rewards/margins": 0.377260684967041, "rewards/rejected": -1.83356511592865, "step": 4890 }, { "epoch": 0.64, "eval_logits/chosen": -2.3070895671844482, "eval_logits/rejected": -2.320559501647949, "eval_logps/chosen": -485.8458251953125, "eval_logps/rejected": -494.9561767578125, "eval_loss": 0.60999995470047, "eval_rewards/accuracies": 0.6729999780654907, "eval_rewards/chosen": -1.5309646129608154, "eval_rewards/margins": 0.40749725699424744, "eval_rewards/rejected": -1.9384618997573853, "eval_runtime": 197.1688, "eval_samples_per_second": 10.144, "eval_steps_per_second": 5.072, "step": 4890 }, { "epoch": 0.64, "learning_rate": 1.7172532020229899e-06, "logits/chosen": -2.526170253753662, "logits/rejected": -2.5139780044555664, "logps/chosen": -498.65167236328125, "logps/rejected": -504.8377990722656, "loss": 0.5866, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.522796869277954, "rewards/margins": 0.4846018850803375, "rewards/rejected": -2.0073986053466797, "step": 4900 }, { "epoch": 0.64, "eval_logits/chosen": -2.3101584911346436, "eval_logits/rejected": -2.3237569332122803, "eval_logps/chosen": -482.6979675292969, "eval_logps/rejected": -491.50799560546875, "eval_loss": 0.6095430850982666, "eval_rewards/accuracies": 0.6769999861717224, "eval_rewards/chosen": -1.499485969543457, "eval_rewards/margins": 0.4044934809207916, "eval_rewards/rejected": -1.9039794206619263, "eval_runtime": 197.2086, "eval_samples_per_second": 10.142, "eval_steps_per_second": 5.071, "step": 4900 }, { "epoch": 0.64, "learning_rate": 1.7064134049840359e-06, "logits/chosen": -2.507721185684204, "logits/rejected": -2.546586275100708, "logps/chosen": -463.30078125, "logps/rejected": -505.97833251953125, "loss": 0.5647, "rewards/accuracies": 0.6875, "rewards/chosen": -1.4573460817337036, "rewards/margins": 0.46546226739883423, "rewards/rejected": -1.922808289527893, "step": 4910 }, { "epoch": 0.64, "eval_logits/chosen": -2.3065459728240967, "eval_logits/rejected": -2.3202407360076904, "eval_logps/chosen": -483.61065673828125, "eval_logps/rejected": -492.63818359375, "eval_loss": 0.6099902391433716, "eval_rewards/accuracies": 0.675000011920929, "eval_rewards/chosen": -1.5086122751235962, "eval_rewards/margins": 0.4066696763038635, "eval_rewards/rejected": -1.9152820110321045, "eval_runtime": 196.7095, "eval_samples_per_second": 10.167, "eval_steps_per_second": 5.084, "step": 4910 }, { "epoch": 0.64, "learning_rate": 1.6955901740946136e-06, "logits/chosen": -2.532555341720581, "logits/rejected": -2.5162951946258545, "logps/chosen": -534.9705200195312, "logps/rejected": -571.7120361328125, "loss": 0.5853, "rewards/accuracies": 0.6875, "rewards/chosen": -1.7092939615249634, "rewards/margins": 0.5048640370368958, "rewards/rejected": -2.214157819747925, "step": 4920 }, { "epoch": 0.64, "eval_logits/chosen": -2.2999629974365234, "eval_logits/rejected": -2.3135812282562256, "eval_logps/chosen": -484.7542724609375, "eval_logps/rejected": -493.9432373046875, "eval_loss": 0.6107072830200195, "eval_rewards/accuracies": 0.6710000038146973, "eval_rewards/chosen": -1.5200488567352295, "eval_rewards/margins": 0.40828338265419006, "eval_rewards/rejected": -1.9283322095870972, "eval_runtime": 196.8667, "eval_samples_per_second": 10.159, "eval_steps_per_second": 5.08, "step": 4920 }, { "epoch": 0.65, "learning_rate": 1.684783735290067e-06, "logits/chosen": -2.452775001525879, "logits/rejected": -2.436053514480591, "logps/chosen": -464.0335388183594, "logps/rejected": -503.10565185546875, "loss": 0.5357, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.4800546169281006, "rewards/margins": 0.6100779175758362, "rewards/rejected": -2.090132713317871, "step": 4930 }, { "epoch": 0.65, "eval_logits/chosen": -2.291494607925415, "eval_logits/rejected": -2.304412364959717, "eval_logps/chosen": -488.9641418457031, "eval_logps/rejected": -498.66180419921875, "eval_loss": 0.6109405755996704, "eval_rewards/accuracies": 0.6710000038146973, "eval_rewards/chosen": -1.5621472597122192, "eval_rewards/margins": 0.413370817899704, "eval_rewards/rejected": -1.975517988204956, "eval_runtime": 197.0966, "eval_samples_per_second": 10.147, "eval_steps_per_second": 5.074, "step": 4930 }, { "epoch": 0.65, "learning_rate": 1.6739943141552079e-06, "logits/chosen": -2.4729270935058594, "logits/rejected": -2.4224693775177, "logps/chosen": -514.30078125, "logps/rejected": -504.29193115234375, "loss": 0.5771, "rewards/accuracies": 0.6875, "rewards/chosen": -1.4810277223587036, "rewards/margins": 0.5367648005485535, "rewards/rejected": -2.017792224884033, "step": 4940 }, { "epoch": 0.65, "eval_logits/chosen": -2.2908596992492676, "eval_logits/rejected": -2.3033571243286133, "eval_logps/chosen": -489.99090576171875, "eval_logps/rejected": -499.9726867675781, "eval_loss": 0.6103520393371582, "eval_rewards/accuracies": 0.6704999804496765, "eval_rewards/chosen": -1.5724151134490967, "eval_rewards/margins": 0.41621133685112, "eval_rewards/rejected": -1.9886267185211182, "eval_runtime": 197.1115, "eval_samples_per_second": 10.147, "eval_steps_per_second": 5.073, "step": 4940 }, { "epoch": 0.65, "learning_rate": 1.663222135919601e-06, "logits/chosen": -2.5372846126556396, "logits/rejected": -2.495419979095459, "logps/chosen": -520.7520141601562, "logps/rejected": -525.1770629882812, "loss": 0.6244, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.5612698793411255, "rewards/margins": 0.35862964391708374, "rewards/rejected": -1.919899582862854, "step": 4950 }, { "epoch": 0.65, "eval_logits/chosen": -2.2972042560577393, "eval_logits/rejected": -2.3097643852233887, "eval_logps/chosen": -483.6584777832031, "eval_logps/rejected": -493.1936340332031, "eval_loss": 0.6088432669639587, "eval_rewards/accuracies": 0.6690000295639038, "eval_rewards/chosen": -1.509090542793274, "eval_rewards/margins": 0.41174548864364624, "eval_rewards/rejected": -1.9208359718322754, "eval_runtime": 196.7904, "eval_samples_per_second": 10.163, "eval_steps_per_second": 5.082, "step": 4950 }, { "epoch": 0.65, "learning_rate": 1.652467425452865e-06, "logits/chosen": -2.539245128631592, "logits/rejected": -2.5281739234924316, "logps/chosen": -452.2598571777344, "logps/rejected": -458.35791015625, "loss": 0.6303, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.4337660074234009, "rewards/margins": 0.30899950861930847, "rewards/rejected": -1.7427654266357422, "step": 4960 }, { "epoch": 0.65, "eval_logits/chosen": -2.304708957672119, "eval_logits/rejected": -2.317460775375366, "eval_logps/chosen": -478.4639892578125, "eval_logps/rejected": -487.269775390625, "eval_loss": 0.6076183319091797, "eval_rewards/accuracies": 0.6675000190734863, "eval_rewards/chosen": -1.4571460485458374, "eval_rewards/margins": 0.40445175766944885, "eval_rewards/rejected": -1.8615976572036743, "eval_runtime": 197.0151, "eval_samples_per_second": 10.152, "eval_steps_per_second": 5.076, "step": 4960 }, { "epoch": 0.65, "learning_rate": 1.6417304072599787e-06, "logits/chosen": -2.5274784564971924, "logits/rejected": -2.4446208477020264, "logps/chosen": -478.2151794433594, "logps/rejected": -510.80938720703125, "loss": 0.6038, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.5238378047943115, "rewards/margins": 0.408879816532135, "rewards/rejected": -1.9327175617218018, "step": 4970 }, { "epoch": 0.65, "eval_logits/chosen": -2.3127100467681885, "eval_logits/rejected": -2.3254218101501465, "eval_logps/chosen": -474.1257629394531, "eval_logps/rejected": -482.4593200683594, "eval_loss": 0.6065331101417542, "eval_rewards/accuracies": 0.6664999723434448, "eval_rewards/chosen": -1.413763165473938, "eval_rewards/margins": 0.3997298777103424, "eval_rewards/rejected": -1.8134931325912476, "eval_runtime": 196.7927, "eval_samples_per_second": 10.163, "eval_steps_per_second": 5.081, "step": 4970 }, { "epoch": 0.65, "learning_rate": 1.6310113054765947e-06, "logits/chosen": -2.5427753925323486, "logits/rejected": -2.5154194831848145, "logps/chosen": -491.2950744628906, "logps/rejected": -490.4586486816406, "loss": 0.5813, "rewards/accuracies": 0.75, "rewards/chosen": -1.4039534330368042, "rewards/margins": 0.549685001373291, "rewards/rejected": -1.9536384344100952, "step": 4980 }, { "epoch": 0.65, "eval_logits/chosen": -2.317664623260498, "eval_logits/rejected": -2.3301045894622803, "eval_logps/chosen": -473.3541259765625, "eval_logps/rejected": -481.7451477050781, "eval_loss": 0.6059139370918274, "eval_rewards/accuracies": 0.6669999957084656, "eval_rewards/chosen": -1.406046748161316, "eval_rewards/margins": 0.4003046751022339, "eval_rewards/rejected": -1.8063515424728394, "eval_runtime": 196.6922, "eval_samples_per_second": 10.168, "eval_steps_per_second": 5.084, "step": 4980 }, { "epoch": 0.65, "learning_rate": 1.6203103438643591e-06, "logits/chosen": -2.5425033569335938, "logits/rejected": -2.545300245285034, "logps/chosen": -458.9690856933594, "logps/rejected": -481.59637451171875, "loss": 0.6386, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -1.4391909837722778, "rewards/margins": 0.31656602025032043, "rewards/rejected": -1.7557569742202759, "step": 4990 }, { "epoch": 0.65, "eval_logits/chosen": -2.317337989807129, "eval_logits/rejected": -2.3296010494232178, "eval_logps/chosen": -473.6238098144531, "eval_logps/rejected": -482.0346984863281, "eval_loss": 0.6056146025657654, "eval_rewards/accuracies": 0.6690000295639038, "eval_rewards/chosen": -1.408744215965271, "eval_rewards/margins": 0.4005022644996643, "eval_rewards/rejected": -1.809246301651001, "eval_runtime": 196.9842, "eval_samples_per_second": 10.153, "eval_steps_per_second": 5.077, "step": 4990 }, { "epoch": 0.65, "learning_rate": 1.6096277458062417e-06, "logits/chosen": -2.5096194744110107, "logits/rejected": -2.506507635116577, "logps/chosen": -388.854736328125, "logps/rejected": -456.4751892089844, "loss": 0.5541, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.3161919116973877, "rewards/margins": 0.5099955797195435, "rewards/rejected": -1.8261874914169312, "step": 5000 }, { "epoch": 0.65, "eval_logits/chosen": -2.3156914710998535, "eval_logits/rejected": -2.3281033039093018, "eval_logps/chosen": -473.21319580078125, "eval_logps/rejected": -481.538330078125, "eval_loss": 0.606336772441864, "eval_rewards/accuracies": 0.6679999828338623, "eval_rewards/chosen": -1.4046378135681152, "eval_rewards/margins": 0.3996453285217285, "eval_rewards/rejected": -1.8042830228805542, "eval_runtime": 196.9579, "eval_samples_per_second": 10.154, "eval_steps_per_second": 5.077, "step": 5000 }, { "epoch": 0.66, "learning_rate": 1.5989637343018705e-06, "logits/chosen": -2.4774773120880127, "logits/rejected": -2.451045274734497, "logps/chosen": -432.1453552246094, "logps/rejected": -484.3370056152344, "loss": 0.5711, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.1870195865631104, "rewards/margins": 0.47284239530563354, "rewards/rejected": -1.6598621606826782, "step": 5010 }, { "epoch": 0.66, "eval_logits/chosen": -2.3094582557678223, "eval_logits/rejected": -2.3220887184143066, "eval_logps/chosen": -474.7253723144531, "eval_logps/rejected": -483.3790588378906, "eval_loss": 0.6073668003082275, "eval_rewards/accuracies": 0.6625000238418579, "eval_rewards/chosen": -1.4197593927383423, "eval_rewards/margins": 0.40293073654174805, "eval_rewards/rejected": -1.8226900100708008, "eval_runtime": 197.0656, "eval_samples_per_second": 10.149, "eval_steps_per_second": 5.074, "step": 5010 }, { "epoch": 0.66, "learning_rate": 1.5883185319628824e-06, "logits/chosen": -2.4050259590148926, "logits/rejected": -2.366429567337036, "logps/chosen": -499.8345642089844, "logps/rejected": -475.7578125, "loss": 0.581, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.4453026056289673, "rewards/margins": 0.42148298025131226, "rewards/rejected": -1.8667854070663452, "step": 5020 }, { "epoch": 0.66, "eval_logits/chosen": -2.312451124191284, "eval_logits/rejected": -2.3250417709350586, "eval_logps/chosen": -475.1689147949219, "eval_logps/rejected": -483.9620666503906, "eval_loss": 0.6066410541534424, "eval_rewards/accuracies": 0.6669999957084656, "eval_rewards/chosen": -1.4241948127746582, "eval_rewards/margins": 0.4043256342411041, "eval_rewards/rejected": -1.8285205364227295, "eval_runtime": 196.8009, "eval_samples_per_second": 10.163, "eval_steps_per_second": 5.081, "step": 5020 }, { "epoch": 0.66, "learning_rate": 1.5776923610082695e-06, "logits/chosen": -2.58607816696167, "logits/rejected": -2.5599188804626465, "logps/chosen": -451.46417236328125, "logps/rejected": -478.35955810546875, "loss": 0.5567, "rewards/accuracies": 0.6875, "rewards/chosen": -1.3380658626556396, "rewards/margins": 0.5778164267539978, "rewards/rejected": -1.9158824682235718, "step": 5030 }, { "epoch": 0.66, "eval_logits/chosen": -2.313192844390869, "eval_logits/rejected": -2.325887441635132, "eval_logps/chosen": -472.5576477050781, "eval_logps/rejected": -480.987060546875, "eval_loss": 0.6061822772026062, "eval_rewards/accuracies": 0.6685000061988831, "eval_rewards/chosen": -1.3980821371078491, "eval_rewards/margins": 0.40068814158439636, "eval_rewards/rejected": -1.7987704277038574, "eval_runtime": 196.8955, "eval_samples_per_second": 10.158, "eval_steps_per_second": 5.079, "step": 5030 }, { "epoch": 0.66, "learning_rate": 1.5670854432597433e-06, "logits/chosen": -2.4839038848876953, "logits/rejected": -2.4908900260925293, "logps/chosen": -514.7501220703125, "logps/rejected": -471.20782470703125, "loss": 0.6432, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.435762643814087, "rewards/margins": 0.2490122765302658, "rewards/rejected": -1.6847747564315796, "step": 5040 }, { "epoch": 0.66, "eval_logits/chosen": -2.3156557083129883, "eval_logits/rejected": -2.3286592960357666, "eval_logps/chosen": -468.1942443847656, "eval_logps/rejected": -475.9248046875, "eval_loss": 0.6063724160194397, "eval_rewards/accuracies": 0.6660000085830688, "eval_rewards/chosen": -1.3544481992721558, "eval_rewards/margins": 0.39369943737983704, "eval_rewards/rejected": -1.7481478452682495, "eval_runtime": 196.7985, "eval_samples_per_second": 10.163, "eval_steps_per_second": 5.081, "step": 5040 }, { "epoch": 0.66, "learning_rate": 1.556498000137104e-06, "logits/chosen": -2.40048885345459, "logits/rejected": -2.391714572906494, "logps/chosen": -435.9031677246094, "logps/rejected": -444.23785400390625, "loss": 0.5867, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.3554993867874146, "rewards/margins": 0.4399290084838867, "rewards/rejected": -1.7954285144805908, "step": 5050 }, { "epoch": 0.66, "eval_logits/chosen": -2.315324068069458, "eval_logits/rejected": -2.3286914825439453, "eval_logps/chosen": -465.41534423828125, "eval_logps/rejected": -472.6462707519531, "eval_loss": 0.6065265536308289, "eval_rewards/accuracies": 0.6660000085830688, "eval_rewards/chosen": -1.3266593217849731, "eval_rewards/margins": 0.3887033462524414, "eval_rewards/rejected": -1.715362787246704, "eval_runtime": 197.0067, "eval_samples_per_second": 10.152, "eval_steps_per_second": 5.076, "step": 5050 }, { "epoch": 0.66, "learning_rate": 1.5459302526536188e-06, "logits/chosen": -2.496645450592041, "logits/rejected": -2.4642739295959473, "logps/chosen": -450.39715576171875, "logps/rejected": -466.58416748046875, "loss": 0.6339, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.3087141513824463, "rewards/margins": 0.37976545095443726, "rewards/rejected": -1.6884794235229492, "step": 5060 }, { "epoch": 0.66, "eval_logits/chosen": -2.3121721744537354, "eval_logits/rejected": -2.325657844543457, "eval_logps/chosen": -464.2017822265625, "eval_logps/rejected": -471.1905822753906, "eval_loss": 0.6065158843994141, "eval_rewards/accuracies": 0.6675000190734863, "eval_rewards/chosen": -1.3145238161087036, "eval_rewards/margins": 0.3862822651863098, "eval_rewards/rejected": -1.7008060216903687, "eval_runtime": 196.9539, "eval_samples_per_second": 10.155, "eval_steps_per_second": 5.077, "step": 5060 }, { "epoch": 0.66, "learning_rate": 1.5353824214114075e-06, "logits/chosen": -2.6206235885620117, "logits/rejected": -2.6003384590148926, "logps/chosen": -457.868896484375, "logps/rejected": -479.43768310546875, "loss": 0.5926, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.3027828931808472, "rewards/margins": 0.39749962091445923, "rewards/rejected": -1.7002826929092407, "step": 5070 }, { "epoch": 0.66, "eval_logits/chosen": -2.3142552375793457, "eval_logits/rejected": -2.3274495601654053, "eval_logps/chosen": -464.9194030761719, "eval_logps/rejected": -471.9013977050781, "eval_loss": 0.6058085560798645, "eval_rewards/accuracies": 0.6664999723434448, "eval_rewards/chosen": -1.321699857711792, "eval_rewards/margins": 0.386214017868042, "eval_rewards/rejected": -1.7079139947891235, "eval_runtime": 197.0686, "eval_samples_per_second": 10.149, "eval_steps_per_second": 5.074, "step": 5070 }, { "epoch": 0.66, "learning_rate": 1.5248547265968373e-06, "logits/chosen": -2.583876371383667, "logits/rejected": -2.569124698638916, "logps/chosen": -426.1070861816406, "logps/rejected": -461.442626953125, "loss": 0.5513, "rewards/accuracies": 0.75, "rewards/chosen": -1.2332156896591187, "rewards/margins": 0.5031381845474243, "rewards/rejected": -1.7363536357879639, "step": 5080 }, { "epoch": 0.66, "eval_logits/chosen": -2.3098928928375244, "eval_logits/rejected": -2.3230464458465576, "eval_logps/chosen": -466.5625305175781, "eval_logps/rejected": -473.8684997558594, "eval_loss": 0.6062521934509277, "eval_rewards/accuracies": 0.6654999852180481, "eval_rewards/chosen": -1.338131070137024, "eval_rewards/margins": 0.3894534111022949, "eval_rewards/rejected": -1.7275844812393188, "eval_runtime": 196.9038, "eval_samples_per_second": 10.157, "eval_steps_per_second": 5.079, "step": 5080 }, { "epoch": 0.67, "learning_rate": 1.5143473879759265e-06, "logits/chosen": -2.5847601890563965, "logits/rejected": -2.500302791595459, "logps/chosen": -431.3771057128906, "logps/rejected": -438.20379638671875, "loss": 0.5409, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.2843835353851318, "rewards/margins": 0.6454133987426758, "rewards/rejected": -1.929796814918518, "step": 5090 }, { "epoch": 0.67, "eval_logits/chosen": -2.3079354763031006, "eval_logits/rejected": -2.320760488510132, "eval_logps/chosen": -468.5992431640625, "eval_logps/rejected": -476.30364990234375, "eval_loss": 0.6059185266494751, "eval_rewards/accuracies": 0.6669999957084656, "eval_rewards/chosen": -1.358498454093933, "eval_rewards/margins": 0.3934376835823059, "eval_rewards/rejected": -1.7519360780715942, "eval_runtime": 197.2696, "eval_samples_per_second": 10.138, "eval_steps_per_second": 5.069, "step": 5090 }, { "epoch": 0.67, "learning_rate": 1.5038606248897586e-06, "logits/chosen": -2.519559144973755, "logits/rejected": -2.530374050140381, "logps/chosen": -504.1170959472656, "logps/rejected": -497.4813537597656, "loss": 0.6739, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.510613203048706, "rewards/margins": 0.20000800490379333, "rewards/rejected": -1.7106212377548218, "step": 5100 }, { "epoch": 0.67, "eval_logits/chosen": -2.3100926876068115, "eval_logits/rejected": -2.3227438926696777, "eval_logps/chosen": -469.88934326171875, "eval_logps/rejected": -477.7745666503906, "eval_loss": 0.6048146486282349, "eval_rewards/accuracies": 0.6690000295639038, "eval_rewards/chosen": -1.3713992834091187, "eval_rewards/margins": 0.3952457904815674, "eval_rewards/rejected": -1.7666451930999756, "eval_runtime": 197.3203, "eval_samples_per_second": 10.136, "eval_steps_per_second": 5.068, "step": 5100 }, { "epoch": 0.67, "learning_rate": 1.4933946562499008e-06, "logits/chosen": -2.4187283515930176, "logits/rejected": -2.424403667449951, "logps/chosen": -458.12347412109375, "logps/rejected": -449.3030700683594, "loss": 0.625, "rewards/accuracies": 0.625, "rewards/chosen": -1.3385467529296875, "rewards/margins": 0.34443196654319763, "rewards/rejected": -1.682978868484497, "step": 5110 }, { "epoch": 0.67, "eval_logits/chosen": -2.309351921081543, "eval_logits/rejected": -2.3218774795532227, "eval_logps/chosen": -469.4795227050781, "eval_logps/rejected": -477.3433837890625, "eval_loss": 0.6042229533195496, "eval_rewards/accuracies": 0.6704999804496765, "eval_rewards/chosen": -1.3673009872436523, "eval_rewards/margins": 0.39503201842308044, "eval_rewards/rejected": -1.7623330354690552, "eval_runtime": 196.9639, "eval_samples_per_second": 10.154, "eval_steps_per_second": 5.077, "step": 5110 }, { "epoch": 0.67, "learning_rate": 1.482949700533835e-06, "logits/chosen": -2.388120174407959, "logits/rejected": -2.3988916873931885, "logps/chosen": -408.99066162109375, "logps/rejected": -426.799560546875, "loss": 0.5985, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.331207036972046, "rewards/margins": 0.37625521421432495, "rewards/rejected": -1.7074623107910156, "step": 5120 }, { "epoch": 0.67, "eval_logits/chosen": -2.307891845703125, "eval_logits/rejected": -2.320222854614258, "eval_logps/chosen": -467.59051513671875, "eval_logps/rejected": -475.2369079589844, "eval_loss": 0.6042217016220093, "eval_rewards/accuracies": 0.671999990940094, "eval_rewards/chosen": -1.348410725593567, "eval_rewards/margins": 0.39285799860954285, "eval_rewards/rejected": -1.7412687540054321, "eval_runtime": 196.9823, "eval_samples_per_second": 10.153, "eval_steps_per_second": 5.077, "step": 5120 }, { "epoch": 0.67, "learning_rate": 1.4725259757803983e-06, "logits/chosen": -2.6179652214050293, "logits/rejected": -2.5962462425231934, "logps/chosen": -518.4244995117188, "logps/rejected": -508.89996337890625, "loss": 0.5549, "rewards/accuracies": 0.75, "rewards/chosen": -1.2763904333114624, "rewards/margins": 0.5201258659362793, "rewards/rejected": -1.7965164184570312, "step": 5130 }, { "epoch": 0.67, "eval_logits/chosen": -2.306852340698242, "eval_logits/rejected": -2.3192129135131836, "eval_logps/chosen": -468.34869384765625, "eval_logps/rejected": -476.2039489746094, "eval_loss": 0.6043887734413147, "eval_rewards/accuracies": 0.6725000143051147, "eval_rewards/chosen": -1.3559925556182861, "eval_rewards/margins": 0.39494654536247253, "eval_rewards/rejected": -1.7509392499923706, "eval_runtime": 197.059, "eval_samples_per_second": 10.149, "eval_steps_per_second": 5.075, "step": 5130 }, { "epoch": 0.67, "learning_rate": 1.4621236995852314e-06, "logits/chosen": -2.6084470748901367, "logits/rejected": -2.593048095703125, "logps/chosen": -468.4862365722656, "logps/rejected": -494.2373962402344, "loss": 0.538, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.3402001857757568, "rewards/margins": 0.5544275045394897, "rewards/rejected": -1.8946278095245361, "step": 5140 }, { "epoch": 0.67, "eval_logits/chosen": -2.3012852668762207, "eval_logits/rejected": -2.313300132751465, "eval_logps/chosen": -470.0094909667969, "eval_logps/rejected": -478.3188171386719, "eval_loss": 0.6046092510223389, "eval_rewards/accuracies": 0.6694999933242798, "eval_rewards/chosen": -1.372600793838501, "eval_rewards/margins": 0.3994869589805603, "eval_rewards/rejected": -1.772087812423706, "eval_runtime": 197.0317, "eval_samples_per_second": 10.151, "eval_steps_per_second": 5.075, "step": 5140 }, { "epoch": 0.67, "learning_rate": 1.4517430890962337e-06, "logits/chosen": -2.5578713417053223, "logits/rejected": -2.462035894393921, "logps/chosen": -484.83935546875, "logps/rejected": -417.79461669921875, "loss": 0.5572, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.3011386394500732, "rewards/margins": 0.4909875988960266, "rewards/rejected": -1.7921262979507446, "step": 5150 }, { "epoch": 0.67, "eval_logits/chosen": -2.3006138801574707, "eval_logits/rejected": -2.3119466304779053, "eval_logps/chosen": -471.7322082519531, "eval_logps/rejected": -480.45635986328125, "eval_loss": 0.6042333245277405, "eval_rewards/accuracies": 0.6704999804496765, "eval_rewards/chosen": -1.389828085899353, "eval_rewards/margins": 0.4036352038383484, "eval_rewards/rejected": -1.7934633493423462, "eval_runtime": 197.062, "eval_samples_per_second": 10.149, "eval_steps_per_second": 5.075, "step": 5150 }, { "epoch": 0.68, "learning_rate": 1.4413843610090342e-06, "logits/chosen": -2.559861183166504, "logits/rejected": -2.483541488647461, "logps/chosen": -505.0181579589844, "logps/rejected": -504.79815673828125, "loss": 0.6035, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.4536011219024658, "rewards/margins": 0.45139384269714355, "rewards/rejected": -1.9049949645996094, "step": 5160 }, { "epoch": 0.68, "eval_logits/chosen": -2.3040931224823, "eval_logits/rejected": -2.3148891925811768, "eval_logps/chosen": -473.65966796875, "eval_logps/rejected": -482.7805480957031, "eval_loss": 0.6035750508308411, "eval_rewards/accuracies": 0.6710000038146973, "eval_rewards/chosen": -1.4091025590896606, "eval_rewards/margins": 0.4076029360294342, "eval_rewards/rejected": -1.816705584526062, "eval_runtime": 197.095, "eval_samples_per_second": 10.147, "eval_steps_per_second": 5.074, "step": 5160 }, { "epoch": 0.68, "learning_rate": 1.4310477315624637e-06, "logits/chosen": -2.513333797454834, "logits/rejected": -2.5067684650421143, "logps/chosen": -457.77783203125, "logps/rejected": -470.79052734375, "loss": 0.6602, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.455190896987915, "rewards/margins": 0.28036192059516907, "rewards/rejected": -1.7355530261993408, "step": 5170 }, { "epoch": 0.68, "eval_logits/chosen": -2.308779001235962, "eval_logits/rejected": -2.319303274154663, "eval_logps/chosen": -469.47418212890625, "eval_logps/rejected": -478.0995178222656, "eval_loss": 0.6027604937553406, "eval_rewards/accuracies": 0.6694999933242798, "eval_rewards/chosen": -1.3672480583190918, "eval_rewards/margins": 0.4026472270488739, "eval_rewards/rejected": -1.769895315170288, "eval_runtime": 197.0407, "eval_samples_per_second": 10.15, "eval_steps_per_second": 5.075, "step": 5170 }, { "epoch": 0.68, "learning_rate": 1.420733416534045e-06, "logits/chosen": -2.38897442817688, "logits/rejected": -2.3405518531799316, "logps/chosen": -443.81549072265625, "logps/rejected": -463.55303955078125, "loss": 0.6586, "rewards/accuracies": 0.6875, "rewards/chosen": -1.3974109888076782, "rewards/margins": 0.30270710587501526, "rewards/rejected": -1.700118064880371, "step": 5180 }, { "epoch": 0.68, "eval_logits/chosen": -2.3164751529693604, "eval_logits/rejected": -2.327291488647461, "eval_logps/chosen": -464.68194580078125, "eval_logps/rejected": -472.7758483886719, "eval_loss": 0.6024616360664368, "eval_rewards/accuracies": 0.6694999933242798, "eval_rewards/chosen": -1.3193248510360718, "eval_rewards/margins": 0.3973331153392792, "eval_rewards/rejected": -1.7166579961776733, "eval_runtime": 197.0127, "eval_samples_per_second": 10.152, "eval_steps_per_second": 5.076, "step": 5180 }, { "epoch": 0.68, "learning_rate": 1.410441631235487e-06, "logits/chosen": -2.5416388511657715, "logits/rejected": -2.523131847381592, "logps/chosen": -464.9375, "logps/rejected": -487.29638671875, "loss": 0.602, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.2932493686676025, "rewards/margins": 0.3969052731990814, "rewards/rejected": -1.6901544332504272, "step": 5190 }, { "epoch": 0.68, "eval_logits/chosen": -2.320040464401245, "eval_logits/rejected": -2.330761432647705, "eval_logps/chosen": -463.5137634277344, "eval_logps/rejected": -471.38946533203125, "eval_loss": 0.6024397015571594, "eval_rewards/accuracies": 0.6685000061988831, "eval_rewards/chosen": -1.3076434135437012, "eval_rewards/margins": 0.3951510787010193, "eval_rewards/rejected": -1.7027945518493652, "eval_runtime": 197.001, "eval_samples_per_second": 10.152, "eval_steps_per_second": 5.076, "step": 5190 }, { "epoch": 0.68, "learning_rate": 1.4001725905081868e-06, "logits/chosen": -2.5292303562164307, "logits/rejected": -2.511136531829834, "logps/chosen": -422.4544982910156, "logps/rejected": -407.1893310546875, "loss": 0.5896, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -1.3296085596084595, "rewards/margins": 0.3973914682865143, "rewards/rejected": -1.7269999980926514, "step": 5200 }, { "epoch": 0.68, "eval_logits/chosen": -2.3227202892303467, "eval_logits/rejected": -2.333261251449585, "eval_logps/chosen": -462.600830078125, "eval_logps/rejected": -470.4217224121094, "eval_loss": 0.6021357178688049, "eval_rewards/accuracies": 0.6679999828338623, "eval_rewards/chosen": -1.2985141277313232, "eval_rewards/margins": 0.3946027457714081, "eval_rewards/rejected": -1.6931169033050537, "eval_runtime": 196.6463, "eval_samples_per_second": 10.171, "eval_steps_per_second": 5.085, "step": 5200 }, { "epoch": 0.68, "learning_rate": 1.3899265087187507e-06, "logits/chosen": -2.5664708614349365, "logits/rejected": -2.5287675857543945, "logps/chosen": -410.075439453125, "logps/rejected": -426.7511291503906, "loss": 0.5838, "rewards/accuracies": 0.6875, "rewards/chosen": -1.2405273914337158, "rewards/margins": 0.39256519079208374, "rewards/rejected": -1.6330926418304443, "step": 5210 }, { "epoch": 0.68, "eval_logits/chosen": -2.322103977203369, "eval_logits/rejected": -2.3327839374542236, "eval_logps/chosen": -461.9039306640625, "eval_logps/rejected": -469.6353759765625, "eval_loss": 0.6023078560829163, "eval_rewards/accuracies": 0.6694999933242798, "eval_rewards/chosen": -1.2915451526641846, "eval_rewards/margins": 0.39370810985565186, "eval_rewards/rejected": -1.6852531433105469, "eval_runtime": 196.8621, "eval_samples_per_second": 10.159, "eval_steps_per_second": 5.08, "step": 5210 }, { "epoch": 0.68, "learning_rate": 1.3797035997545144e-06, "logits/chosen": -2.5763635635375977, "logits/rejected": -2.527101993560791, "logps/chosen": -473.539794921875, "logps/rejected": -478.39569091796875, "loss": 0.5508, "rewards/accuracies": 0.6875, "rewards/chosen": -1.1910579204559326, "rewards/margins": 0.48212796449661255, "rewards/rejected": -1.67318594455719, "step": 5220 }, { "epoch": 0.68, "eval_logits/chosen": -2.3128867149353027, "eval_logits/rejected": -2.323371410369873, "eval_logps/chosen": -464.56024169921875, "eval_logps/rejected": -472.73760986328125, "eval_loss": 0.6023849844932556, "eval_rewards/accuracies": 0.6710000038146973, "eval_rewards/chosen": -1.318108320236206, "eval_rewards/margins": 0.3981679081916809, "eval_rewards/rejected": -1.7162760496139526, "eval_runtime": 196.9084, "eval_samples_per_second": 10.157, "eval_steps_per_second": 5.079, "step": 5220 }, { "epoch": 0.68, "learning_rate": 1.3695040770190816e-06, "logits/chosen": -2.554281234741211, "logits/rejected": -2.5586276054382324, "logps/chosen": -431.57958984375, "logps/rejected": -451.42913818359375, "loss": 0.6031, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.2992688417434692, "rewards/margins": 0.37253737449645996, "rewards/rejected": -1.6718060970306396, "step": 5230 }, { "epoch": 0.68, "eval_logits/chosen": -2.3034961223602295, "eval_logits/rejected": -2.3134658336639404, "eval_logps/chosen": -468.2120056152344, "eval_logps/rejected": -476.7961730957031, "eval_loss": 0.6026508808135986, "eval_rewards/accuracies": 0.6690000295639038, "eval_rewards/chosen": -1.354626178741455, "eval_rewards/margins": 0.402235746383667, "eval_rewards/rejected": -1.756861925125122, "eval_runtime": 197.2352, "eval_samples_per_second": 10.14, "eval_steps_per_second": 5.07, "step": 5230 }, { "epoch": 0.69, "learning_rate": 1.3593281534278651e-06, "logits/chosen": -2.472536563873291, "logits/rejected": -2.5110905170440674, "logps/chosen": -414.9605407714844, "logps/rejected": -476.60076904296875, "loss": 0.5353, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.293400526046753, "rewards/margins": 0.5274486541748047, "rewards/rejected": -1.8208494186401367, "step": 5240 }, { "epoch": 0.69, "eval_logits/chosen": -2.300142526626587, "eval_logits/rejected": -2.3098344802856445, "eval_logps/chosen": -471.1337585449219, "eval_logps/rejected": -479.9906005859375, "eval_loss": 0.6028639078140259, "eval_rewards/accuracies": 0.6704999804496765, "eval_rewards/chosen": -1.3838437795639038, "eval_rewards/margins": 0.4049619436264038, "eval_rewards/rejected": -1.7888059616088867, "eval_runtime": 197.0056, "eval_samples_per_second": 10.152, "eval_steps_per_second": 5.076, "step": 5240 }, { "epoch": 0.69, "learning_rate": 1.3491760414036478e-06, "logits/chosen": -2.4985485076904297, "logits/rejected": -2.4522864818573, "logps/chosen": -497.62725830078125, "logps/rejected": -464.302978515625, "loss": 0.6128, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.3614141941070557, "rewards/margins": 0.38570067286491394, "rewards/rejected": -1.7471147775650024, "step": 5250 }, { "epoch": 0.69, "eval_logits/chosen": -2.2994742393493652, "eval_logits/rejected": -2.3090596199035645, "eval_logps/chosen": -471.99444580078125, "eval_logps/rejected": -481.0536193847656, "eval_loss": 0.6028826832771301, "eval_rewards/accuracies": 0.6710000038146973, "eval_rewards/chosen": -1.3924506902694702, "eval_rewards/margins": 0.4069855213165283, "eval_rewards/rejected": -1.799436330795288, "eval_runtime": 197.0298, "eval_samples_per_second": 10.151, "eval_steps_per_second": 5.075, "step": 5250 }, { "epoch": 0.69, "learning_rate": 1.3390479528721444e-06, "logits/chosen": -2.4176363945007324, "logits/rejected": -2.4405970573425293, "logps/chosen": -457.1136779785156, "logps/rejected": -496.7222595214844, "loss": 0.6085, "rewards/accuracies": 0.6875, "rewards/chosen": -1.4070097208023071, "rewards/margins": 0.40776365995407104, "rewards/rejected": -1.8147733211517334, "step": 5260 }, { "epoch": 0.69, "eval_logits/chosen": -2.300313711166382, "eval_logits/rejected": -2.3098363876342773, "eval_logps/chosen": -472.0662536621094, "eval_logps/rejected": -481.25726318359375, "eval_loss": 0.6027334928512573, "eval_rewards/accuracies": 0.6664999723434448, "eval_rewards/chosen": -1.393168330192566, "eval_rewards/margins": 0.4083041250705719, "eval_rewards/rejected": -1.8014723062515259, "eval_runtime": 197.0593, "eval_samples_per_second": 10.149, "eval_steps_per_second": 5.075, "step": 5260 }, { "epoch": 0.69, "learning_rate": 1.3289440992575756e-06, "logits/chosen": -2.5740933418273926, "logits/rejected": -2.5310654640197754, "logps/chosen": -502.00408935546875, "logps/rejected": -504.9661560058594, "loss": 0.567, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.3182388544082642, "rewards/margins": 0.45366114377975464, "rewards/rejected": -1.771899938583374, "step": 5270 }, { "epoch": 0.69, "eval_logits/chosen": -2.2995877265930176, "eval_logits/rejected": -2.309088706970215, "eval_logps/chosen": -471.35662841796875, "eval_logps/rejected": -480.6314392089844, "eval_loss": 0.6026535034179688, "eval_rewards/accuracies": 0.6685000061988831, "eval_rewards/chosen": -1.386072039604187, "eval_rewards/margins": 0.40914198756217957, "eval_rewards/rejected": -1.795214056968689, "eval_runtime": 196.9223, "eval_samples_per_second": 10.156, "eval_steps_per_second": 5.078, "step": 5270 }, { "epoch": 0.69, "learning_rate": 1.3188646914782616e-06, "logits/chosen": -2.597381353378296, "logits/rejected": -2.5523602962493896, "logps/chosen": -549.8211669921875, "logps/rejected": -480.4268493652344, "loss": 0.5273, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.4053056240081787, "rewards/margins": 0.5496398210525513, "rewards/rejected": -1.9549453258514404, "step": 5280 }, { "epoch": 0.69, "eval_logits/chosen": -2.29966402053833, "eval_logits/rejected": -2.3090415000915527, "eval_logps/chosen": -472.23272705078125, "eval_logps/rejected": -481.6187438964844, "eval_loss": 0.6027253270149231, "eval_rewards/accuracies": 0.6690000295639038, "eval_rewards/chosen": -1.3948334455490112, "eval_rewards/margins": 0.41025370359420776, "eval_rewards/rejected": -1.8050872087478638, "eval_runtime": 196.8822, "eval_samples_per_second": 10.158, "eval_steps_per_second": 5.079, "step": 5280 }, { "epoch": 0.69, "learning_rate": 1.3088099399422109e-06, "logits/chosen": -2.586010456085205, "logits/rejected": -2.5378670692443848, "logps/chosen": -488.80267333984375, "logps/rejected": -491.5059509277344, "loss": 0.6205, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.334667682647705, "rewards/margins": 0.39315497875213623, "rewards/rejected": -1.7278226613998413, "step": 5290 }, { "epoch": 0.69, "eval_logits/chosen": -2.3015239238739014, "eval_logits/rejected": -2.310614824295044, "eval_logps/chosen": -471.9217834472656, "eval_logps/rejected": -481.25775146484375, "eval_loss": 0.6026984453201294, "eval_rewards/accuracies": 0.671999990940094, "eval_rewards/chosen": -1.391723871231079, "eval_rewards/margins": 0.40975335240364075, "eval_rewards/rejected": -1.8014771938323975, "eval_runtime": 196.9474, "eval_samples_per_second": 10.155, "eval_steps_per_second": 5.077, "step": 5290 }, { "epoch": 0.69, "learning_rate": 1.2987800545427353e-06, "logits/chosen": -2.566490650177002, "logits/rejected": -2.4875643253326416, "logps/chosen": -482.4261169433594, "logps/rejected": -494.2449645996094, "loss": 0.5601, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.377469778060913, "rewards/margins": 0.5372947454452515, "rewards/rejected": -1.914764404296875, "step": 5300 }, { "epoch": 0.69, "eval_logits/chosen": -2.3011436462402344, "eval_logits/rejected": -2.3103692531585693, "eval_logps/chosen": -471.6266174316406, "eval_logps/rejected": -481.00421142578125, "eval_loss": 0.6028599739074707, "eval_rewards/accuracies": 0.6700000166893005, "eval_rewards/chosen": -1.3887721300125122, "eval_rewards/margins": 0.4101700484752655, "eval_rewards/rejected": -1.7989420890808105, "eval_runtime": 196.6641, "eval_samples_per_second": 10.17, "eval_steps_per_second": 5.085, "step": 5300 }, { "epoch": 0.69, "learning_rate": 1.288775244654062e-06, "logits/chosen": -2.5995917320251465, "logits/rejected": -2.578198194503784, "logps/chosen": -530.6082153320312, "logps/rejected": -501.57861328125, "loss": 0.6486, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.3748692274093628, "rewards/margins": 0.3246624767780304, "rewards/rejected": -1.6995317935943604, "step": 5310 }, { "epoch": 0.69, "eval_logits/chosen": -2.303572654724121, "eval_logits/rejected": -2.3130619525909424, "eval_logps/chosen": -470.4731140136719, "eval_logps/rejected": -479.8280334472656, "eval_loss": 0.6028394103050232, "eval_rewards/accuracies": 0.6694999933242798, "eval_rewards/chosen": -1.3772375583648682, "eval_rewards/margins": 0.4099426567554474, "eval_rewards/rejected": -1.7871803045272827, "eval_runtime": 196.8546, "eval_samples_per_second": 10.16, "eval_steps_per_second": 5.08, "step": 5310 }, { "epoch": 0.7, "learning_rate": 1.2787957191269696e-06, "logits/chosen": -2.4609122276306152, "logits/rejected": -2.4693045616149902, "logps/chosen": -468.830322265625, "logps/rejected": -495.522216796875, "loss": 0.6643, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.389495611190796, "rewards/margins": 0.28900545835494995, "rewards/rejected": -1.6785008907318115, "step": 5320 }, { "epoch": 0.7, "eval_logits/chosen": -2.305281400680542, "eval_logits/rejected": -2.3151094913482666, "eval_logps/chosen": -466.90655517578125, "eval_logps/rejected": -475.7928771972656, "eval_loss": 0.6025946140289307, "eval_rewards/accuracies": 0.6685000061988831, "eval_rewards/chosen": -1.341571569442749, "eval_rewards/margins": 0.4052570164203644, "eval_rewards/rejected": -1.7468284368515015, "eval_runtime": 196.8357, "eval_samples_per_second": 10.161, "eval_steps_per_second": 5.08, "step": 5320 }, { "epoch": 0.7, "learning_rate": 1.2688416862844193e-06, "logits/chosen": -2.4436516761779785, "logits/rejected": -2.497119426727295, "logps/chosen": -410.174072265625, "logps/rejected": -484.532470703125, "loss": 0.5421, "rewards/accuracies": 0.6875, "rewards/chosen": -1.1837115287780762, "rewards/margins": 0.5746269822120667, "rewards/rejected": -1.7583385705947876, "step": 5330 }, { "epoch": 0.7, "eval_logits/chosen": -2.3095591068267822, "eval_logits/rejected": -2.319445848464966, "eval_logps/chosen": -464.5736083984375, "eval_logps/rejected": -473.2466125488281, "eval_loss": 0.6024113893508911, "eval_rewards/accuracies": 0.671500027179718, "eval_rewards/chosen": -1.3182419538497925, "eval_rewards/margins": 0.40312403440475464, "eval_rewards/rejected": -1.7213659286499023, "eval_runtime": 196.7266, "eval_samples_per_second": 10.166, "eval_steps_per_second": 5.083, "step": 5330 }, { "epoch": 0.7, "learning_rate": 1.2589133539172193e-06, "logits/chosen": -2.6252217292785645, "logits/rejected": -2.5867104530334473, "logps/chosen": -479.29510498046875, "logps/rejected": -485.63214111328125, "loss": 0.5113, "rewards/accuracies": 0.75, "rewards/chosen": -1.0865707397460938, "rewards/margins": 0.601614773273468, "rewards/rejected": -1.688185453414917, "step": 5340 }, { "epoch": 0.7, "eval_logits/chosen": -2.302642345428467, "eval_logits/rejected": -2.312276840209961, "eval_logps/chosen": -468.6505432128906, "eval_logps/rejected": -478.03350830078125, "eval_loss": 0.602563738822937, "eval_rewards/accuracies": 0.6690000295639038, "eval_rewards/chosen": -1.3590114116668701, "eval_rewards/margins": 0.4102230370044708, "eval_rewards/rejected": -1.7692344188690186, "eval_runtime": 196.8303, "eval_samples_per_second": 10.161, "eval_steps_per_second": 5.081, "step": 5340 }, { "epoch": 0.7, "learning_rate": 1.249010929279672e-06, "logits/chosen": -2.6182000637054443, "logits/rejected": -2.5885214805603027, "logps/chosen": -475.0232849121094, "logps/rejected": -491.6460876464844, "loss": 0.6035, "rewards/accuracies": 0.625, "rewards/chosen": -1.3365066051483154, "rewards/margins": 0.3971422016620636, "rewards/rejected": -1.7336488962173462, "step": 5350 }, { "epoch": 0.7, "eval_logits/chosen": -2.3038649559020996, "eval_logits/rejected": -2.3131213188171387, "eval_logps/chosen": -471.2071228027344, "eval_logps/rejected": -480.9702453613281, "eval_loss": 0.6023095846176147, "eval_rewards/accuracies": 0.6664999723434448, "eval_rewards/chosen": -1.3845771551132202, "eval_rewards/margins": 0.41402512788772583, "eval_rewards/rejected": -1.7986023426055908, "eval_runtime": 196.8981, "eval_samples_per_second": 10.158, "eval_steps_per_second": 5.079, "step": 5350 }, { "epoch": 0.7, "learning_rate": 1.2391346190852603e-06, "logits/chosen": -2.604792833328247, "logits/rejected": -2.582808017730713, "logps/chosen": -467.715087890625, "logps/rejected": -480.80731201171875, "loss": 0.624, "rewards/accuracies": 0.6875, "rewards/chosen": -1.4067754745483398, "rewards/margins": 0.4088035225868225, "rewards/rejected": -1.8155790567398071, "step": 5360 }, { "epoch": 0.7, "eval_logits/chosen": -2.2990853786468506, "eval_logits/rejected": -2.308011293411255, "eval_logps/chosen": -474.9680480957031, "eval_logps/rejected": -485.0401916503906, "eval_loss": 0.6023436188697815, "eval_rewards/accuracies": 0.6675000190734863, "eval_rewards/chosen": -1.4221864938735962, "eval_rewards/margins": 0.4171146750450134, "eval_rewards/rejected": -1.8393012285232544, "eval_runtime": 196.9161, "eval_samples_per_second": 10.157, "eval_steps_per_second": 5.078, "step": 5360 }, { "epoch": 0.7, "learning_rate": 1.2292846295023222e-06, "logits/chosen": -2.5381789207458496, "logits/rejected": -2.5520262718200684, "logps/chosen": -516.1911010742188, "logps/rejected": -499.8275451660156, "loss": 0.6991, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -1.5526403188705444, "rewards/margins": 0.15553751587867737, "rewards/rejected": -1.708177924156189, "step": 5370 }, { "epoch": 0.7, "eval_logits/chosen": -2.297722578048706, "eval_logits/rejected": -2.3069052696228027, "eval_logps/chosen": -473.8943786621094, "eval_logps/rejected": -483.74273681640625, "eval_loss": 0.6021169424057007, "eval_rewards/accuracies": 0.6679999828338623, "eval_rewards/chosen": -1.4114493131637573, "eval_rewards/margins": 0.4148778021335602, "eval_rewards/rejected": -1.8263272047042847, "eval_runtime": 196.8862, "eval_samples_per_second": 10.158, "eval_steps_per_second": 5.079, "step": 5370 }, { "epoch": 0.7, "learning_rate": 1.2194611661497576e-06, "logits/chosen": -2.432284355163574, "logits/rejected": -2.4482924938201904, "logps/chosen": -470.35955810546875, "logps/rejected": -488.89324951171875, "loss": 0.6203, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.4455724954605103, "rewards/margins": 0.3604838252067566, "rewards/rejected": -1.8060563802719116, "step": 5380 }, { "epoch": 0.7, "eval_logits/chosen": -2.299294948577881, "eval_logits/rejected": -2.308401346206665, "eval_logps/chosen": -474.36767578125, "eval_logps/rejected": -484.0581970214844, "eval_loss": 0.6022467613220215, "eval_rewards/accuracies": 0.6685000061988831, "eval_rewards/chosen": -1.4161828756332397, "eval_rewards/margins": 0.4132993519306183, "eval_rewards/rejected": -1.829482078552246, "eval_runtime": 197.0801, "eval_samples_per_second": 10.148, "eval_steps_per_second": 5.074, "step": 5380 }, { "epoch": 0.71, "learning_rate": 1.2096644340927247e-06, "logits/chosen": -2.5367202758789062, "logits/rejected": -2.546861171722412, "logps/chosen": -488.73858642578125, "logps/rejected": -516.7587890625, "loss": 0.5684, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.3687140941619873, "rewards/margins": 0.5021928548812866, "rewards/rejected": -1.8709068298339844, "step": 5390 }, { "epoch": 0.71, "eval_logits/chosen": -2.299215316772461, "eval_logits/rejected": -2.3083655834198, "eval_logps/chosen": -474.4788818359375, "eval_logps/rejected": -484.1181640625, "eval_loss": 0.6022253632545471, "eval_rewards/accuracies": 0.6704999804496765, "eval_rewards/chosen": -1.4172947406768799, "eval_rewards/margins": 0.412786602973938, "eval_rewards/rejected": -1.8300813436508179, "eval_runtime": 197.0778, "eval_samples_per_second": 10.148, "eval_steps_per_second": 5.074, "step": 5390 }, { "epoch": 0.71, "learning_rate": 1.19989463783837e-06, "logits/chosen": -2.606667995452881, "logits/rejected": -2.5329880714416504, "logps/chosen": -507.55950927734375, "logps/rejected": -529.9376220703125, "loss": 0.5489, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.3215059041976929, "rewards/margins": 0.5542451739311218, "rewards/rejected": -1.8757511377334595, "step": 5400 }, { "epoch": 0.71, "eval_logits/chosen": -2.3006458282470703, "eval_logits/rejected": -2.3096537590026855, "eval_logps/chosen": -475.47930908203125, "eval_logps/rejected": -485.2168273925781, "eval_loss": 0.6021424531936646, "eval_rewards/accuracies": 0.6700000166893005, "eval_rewards/chosen": -1.4272990226745605, "eval_rewards/margins": 0.4137687385082245, "eval_rewards/rejected": -1.841067910194397, "eval_runtime": 196.93, "eval_samples_per_second": 10.156, "eval_steps_per_second": 5.078, "step": 5400 }, { "epoch": 0.71, "learning_rate": 1.1901519813315495e-06, "logits/chosen": -2.4493112564086914, "logits/rejected": -2.4181106090545654, "logps/chosen": -454.238525390625, "logps/rejected": -464.66278076171875, "loss": 0.6004, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.4211333990097046, "rewards/margins": 0.36835595965385437, "rewards/rejected": -1.7894893884658813, "step": 5410 }, { "epoch": 0.71, "eval_logits/chosen": -2.298659563064575, "eval_logits/rejected": -2.3075058460235596, "eval_logps/chosen": -477.7009582519531, "eval_logps/rejected": -487.77276611328125, "eval_loss": 0.6020307540893555, "eval_rewards/accuracies": 0.6704999804496765, "eval_rewards/chosen": -1.4495152235031128, "eval_rewards/margins": 0.41711264848709106, "eval_rewards/rejected": -1.8666279315948486, "eval_runtime": 196.8669, "eval_samples_per_second": 10.159, "eval_steps_per_second": 5.08, "step": 5410 }, { "epoch": 0.71, "learning_rate": 1.1804366679505798e-06, "logits/chosen": -2.4779162406921387, "logits/rejected": -2.447110891342163, "logps/chosen": -510.69012451171875, "logps/rejected": -487.569580078125, "loss": 0.5903, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.481211543083191, "rewards/margins": 0.45118799805641174, "rewards/rejected": -1.9323995113372803, "step": 5420 }, { "epoch": 0.71, "eval_logits/chosen": -2.298388719558716, "eval_logits/rejected": -2.3070218563079834, "eval_logps/chosen": -479.8644714355469, "eval_logps/rejected": -490.0103454589844, "eval_loss": 0.6022910475730896, "eval_rewards/accuracies": 0.6710000038146973, "eval_rewards/chosen": -1.4711503982543945, "eval_rewards/margins": 0.4178526699542999, "eval_rewards/rejected": -1.889003038406372, "eval_runtime": 197.0663, "eval_samples_per_second": 10.149, "eval_steps_per_second": 5.074, "step": 5420 }, { "epoch": 0.71, "learning_rate": 1.1707489005029877e-06, "logits/chosen": -2.521374225616455, "logits/rejected": -2.524177074432373, "logps/chosen": -473.6175842285156, "logps/rejected": -499.723876953125, "loss": 0.6109, "rewards/accuracies": 0.6875, "rewards/chosen": -1.4926834106445312, "rewards/margins": 0.5293210744857788, "rewards/rejected": -2.0220046043395996, "step": 5430 }, { "epoch": 0.71, "eval_logits/chosen": -2.296948194503784, "eval_logits/rejected": -2.3053503036499023, "eval_logps/chosen": -481.6418151855469, "eval_logps/rejected": -492.03350830078125, "eval_loss": 0.6023349165916443, "eval_rewards/accuracies": 0.671999990940094, "eval_rewards/chosen": -1.4889241456985474, "eval_rewards/margins": 0.4203101098537445, "eval_rewards/rejected": -1.9092342853546143, "eval_runtime": 196.8203, "eval_samples_per_second": 10.162, "eval_steps_per_second": 5.081, "step": 5430 }, { "epoch": 0.71, "learning_rate": 1.1610888812212749e-06, "logits/chosen": -2.4720962047576904, "logits/rejected": -2.4360768795013428, "logps/chosen": -490.5087890625, "logps/rejected": -482.7325134277344, "loss": 0.6214, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.5258370637893677, "rewards/margins": 0.32655078172683716, "rewards/rejected": -1.8523876667022705, "step": 5440 }, { "epoch": 0.71, "eval_logits/chosen": -2.2956690788269043, "eval_logits/rejected": -2.3042073249816895, "eval_logps/chosen": -481.92974853515625, "eval_logps/rejected": -492.52020263671875, "eval_loss": 0.6022093892097473, "eval_rewards/accuracies": 0.671999990940094, "eval_rewards/chosen": -1.4918036460876465, "eval_rewards/margins": 0.4222985506057739, "eval_rewards/rejected": -1.91410231590271, "eval_runtime": 196.9069, "eval_samples_per_second": 10.157, "eval_steps_per_second": 5.079, "step": 5440 }, { "epoch": 0.71, "learning_rate": 1.1514568117587035e-06, "logits/chosen": -2.538889169692993, "logits/rejected": -2.563322067260742, "logps/chosen": -498.38079833984375, "logps/rejected": -502.63726806640625, "loss": 0.6564, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.6392685174942017, "rewards/margins": 0.24687933921813965, "rewards/rejected": -1.8861478567123413, "step": 5450 }, { "epoch": 0.71, "eval_logits/chosen": -2.2915148735046387, "eval_logits/rejected": -2.30027437210083, "eval_logps/chosen": -482.7044372558594, "eval_logps/rejected": -493.3853759765625, "eval_loss": 0.6024051308631897, "eval_rewards/accuracies": 0.6694999933242798, "eval_rewards/chosen": -1.4995503425598145, "eval_rewards/margins": 0.4232032299041748, "eval_rewards/rejected": -1.9227536916732788, "eval_runtime": 196.9898, "eval_samples_per_second": 10.153, "eval_steps_per_second": 5.076, "step": 5450 }, { "epoch": 0.71, "learning_rate": 1.1418528931850781e-06, "logits/chosen": -2.5654962062835693, "logits/rejected": -2.4673209190368652, "logps/chosen": -489.5174865722656, "logps/rejected": -485.2567443847656, "loss": 0.5649, "rewards/accuracies": 0.6875, "rewards/chosen": -1.4588682651519775, "rewards/margins": 0.5729068517684937, "rewards/rejected": -2.0317752361297607, "step": 5460 }, { "epoch": 0.71, "eval_logits/chosen": -2.287652015686035, "eval_logits/rejected": -2.2963643074035645, "eval_logps/chosen": -483.6240234375, "eval_logps/rejected": -494.3249206542969, "eval_loss": 0.6025742888450623, "eval_rewards/accuracies": 0.671500027179718, "eval_rewards/chosen": -1.5087462663650513, "eval_rewards/margins": 0.4234027564525604, "eval_rewards/rejected": -1.9321489334106445, "eval_runtime": 197.2711, "eval_samples_per_second": 10.138, "eval_steps_per_second": 5.069, "step": 5460 }, { "epoch": 0.72, "learning_rate": 1.1322773259825563e-06, "logits/chosen": -2.49501371383667, "logits/rejected": -2.4475762844085693, "logps/chosen": -479.83837890625, "logps/rejected": -441.84918212890625, "loss": 0.5814, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.4474797248840332, "rewards/margins": 0.4089323580265045, "rewards/rejected": -1.8564122915267944, "step": 5470 }, { "epoch": 0.72, "eval_logits/chosen": -2.282066822052002, "eval_logits/rejected": -2.2901947498321533, "eval_logps/chosen": -486.34967041015625, "eval_logps/rejected": -497.11529541015625, "eval_loss": 0.602572500705719, "eval_rewards/accuracies": 0.6700000166893005, "eval_rewards/chosen": -1.536002278327942, "eval_rewards/margins": 0.42405039072036743, "eval_rewards/rejected": -1.9600528478622437, "eval_runtime": 197.1192, "eval_samples_per_second": 10.146, "eval_steps_per_second": 5.073, "step": 5470 }, { "epoch": 0.72, "learning_rate": 1.1227303100414552e-06, "logits/chosen": -2.4446446895599365, "logits/rejected": -2.4898505210876465, "logps/chosen": -435.85498046875, "logps/rejected": -499.18853759765625, "loss": 0.5398, "rewards/accuracies": 0.75, "rewards/chosen": -1.460822582244873, "rewards/margins": 0.5744005441665649, "rewards/rejected": -2.0352234840393066, "step": 5480 }, { "epoch": 0.72, "eval_logits/chosen": -2.2764456272125244, "eval_logits/rejected": -2.2844159603118896, "eval_logps/chosen": -489.5582275390625, "eval_logps/rejected": -500.5900573730469, "eval_loss": 0.6028984785079956, "eval_rewards/accuracies": 0.6735000014305115, "eval_rewards/chosen": -1.5680886507034302, "eval_rewards/margins": 0.4267115294933319, "eval_rewards/rejected": -1.994800090789795, "eval_runtime": 197.1388, "eval_samples_per_second": 10.145, "eval_steps_per_second": 5.073, "step": 5480 }, { "epoch": 0.72, "learning_rate": 1.113212044656087e-06, "logits/chosen": -2.4338154792785645, "logits/rejected": -2.4598872661590576, "logps/chosen": -453.0787048339844, "logps/rejected": -505.65850830078125, "loss": 0.6122, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.5571606159210205, "rewards/margins": 0.4209592342376709, "rewards/rejected": -1.9781198501586914, "step": 5490 }, { "epoch": 0.72, "eval_logits/chosen": -2.277843713760376, "eval_logits/rejected": -2.285881996154785, "eval_logps/chosen": -490.07916259765625, "eval_logps/rejected": -501.286376953125, "eval_loss": 0.6028754711151123, "eval_rewards/accuracies": 0.6725000143051147, "eval_rewards/chosen": -1.573297381401062, "eval_rewards/margins": 0.42846596240997314, "eval_rewards/rejected": -2.001763343811035, "eval_runtime": 197.1379, "eval_samples_per_second": 10.145, "eval_steps_per_second": 5.073, "step": 5490 }, { "epoch": 0.72, "learning_rate": 1.1037227285205951e-06, "logits/chosen": -2.3397364616394043, "logits/rejected": -2.409205675125122, "logps/chosen": -492.1761779785156, "logps/rejected": -530.21923828125, "loss": 0.6479, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.6842491626739502, "rewards/margins": 0.386981338262558, "rewards/rejected": -2.07123064994812, "step": 5500 }, { "epoch": 0.72, "eval_logits/chosen": -2.2736318111419678, "eval_logits/rejected": -2.2816479206085205, "eval_logps/chosen": -492.9660339355469, "eval_logps/rejected": -504.3878173828125, "eval_loss": 0.6026748418807983, "eval_rewards/accuracies": 0.6740000247955322, "eval_rewards/chosen": -1.6021665334701538, "eval_rewards/margins": 0.4306114614009857, "eval_rewards/rejected": -2.032778024673462, "eval_runtime": 197.0317, "eval_samples_per_second": 10.151, "eval_steps_per_second": 5.075, "step": 5500 }, { "epoch": 0.72, "learning_rate": 1.0942625597248028e-06, "logits/chosen": -2.430037021636963, "logits/rejected": -2.4081974029541016, "logps/chosen": -472.9207458496094, "logps/rejected": -475.80767822265625, "loss": 0.5748, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.574325442314148, "rewards/margins": 0.5580999255180359, "rewards/rejected": -2.132425308227539, "step": 5510 }, { "epoch": 0.72, "eval_logits/chosen": -2.2735891342163086, "eval_logits/rejected": -2.2813940048217773, "eval_logps/chosen": -494.952392578125, "eval_logps/rejected": -506.3679504394531, "eval_loss": 0.6027331948280334, "eval_rewards/accuracies": 0.6735000014305115, "eval_rewards/chosen": -1.6220301389694214, "eval_rewards/margins": 0.43054893612861633, "eval_rewards/rejected": -2.052579164505005, "eval_runtime": 197.0977, "eval_samples_per_second": 10.147, "eval_steps_per_second": 5.074, "step": 5510 }, { "epoch": 0.72, "learning_rate": 1.0848317357500854e-06, "logits/chosen": -2.406419277191162, "logits/rejected": -2.399305582046509, "logps/chosen": -533.1932983398438, "logps/rejected": -488.5306091308594, "loss": 0.6292, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.7428977489471436, "rewards/margins": 0.3303782641887665, "rewards/rejected": -2.0732760429382324, "step": 5520 }, { "epoch": 0.72, "eval_logits/chosen": -2.2739861011505127, "eval_logits/rejected": -2.2815887928009033, "eval_logps/chosen": -495.77471923828125, "eval_logps/rejected": -507.1916809082031, "eval_loss": 0.6024631261825562, "eval_rewards/accuracies": 0.6735000014305115, "eval_rewards/chosen": -1.6302530765533447, "eval_rewards/margins": 0.43056365847587585, "eval_rewards/rejected": -2.060816764831543, "eval_runtime": 197.3728, "eval_samples_per_second": 10.133, "eval_steps_per_second": 5.067, "step": 5520 }, { "epoch": 0.72, "learning_rate": 1.0754304534652404e-06, "logits/chosen": -2.475829601287842, "logits/rejected": -2.5434672832489014, "logps/chosen": -475.5762634277344, "logps/rejected": -535.7167358398438, "loss": 0.6393, "rewards/accuracies": 0.5625, "rewards/chosen": -1.6018517017364502, "rewards/margins": 0.27932238578796387, "rewards/rejected": -1.881174087524414, "step": 5530 }, { "epoch": 0.72, "eval_logits/chosen": -2.278412342071533, "eval_logits/rejected": -2.286480665206909, "eval_logps/chosen": -490.4624938964844, "eval_logps/rejected": -501.47857666015625, "eval_loss": 0.6020786166191101, "eval_rewards/accuracies": 0.6735000014305115, "eval_rewards/chosen": -1.577130913734436, "eval_rewards/margins": 0.4265541732311249, "eval_rewards/rejected": -2.0036849975585938, "eval_runtime": 197.0737, "eval_samples_per_second": 10.148, "eval_steps_per_second": 5.074, "step": 5530 }, { "epoch": 0.72, "learning_rate": 1.0660589091223854e-06, "logits/chosen": -2.4679157733917236, "logits/rejected": -2.430014133453369, "logps/chosen": -423.26025390625, "logps/rejected": -465.1514587402344, "loss": 0.5557, "rewards/accuracies": 0.6875, "rewards/chosen": -1.5359132289886475, "rewards/margins": 0.5502170920372009, "rewards/rejected": -2.086129903793335, "step": 5540 }, { "epoch": 0.72, "eval_logits/chosen": -2.2755799293518066, "eval_logits/rejected": -2.2840092182159424, "eval_logps/chosen": -489.9895935058594, "eval_logps/rejected": -501.1238098144531, "eval_loss": 0.6020728349685669, "eval_rewards/accuracies": 0.671500027179718, "eval_rewards/chosen": -1.572401762008667, "eval_rewards/margins": 0.4277363419532776, "eval_rewards/rejected": -2.000138282775879, "eval_runtime": 196.8328, "eval_samples_per_second": 10.161, "eval_steps_per_second": 5.08, "step": 5540 }, { "epoch": 0.73, "learning_rate": 1.0567172983528534e-06, "logits/chosen": -2.4794580936431885, "logits/rejected": -2.463869571685791, "logps/chosen": -414.53021240234375, "logps/rejected": -453.79705810546875, "loss": 0.5549, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.4582284688949585, "rewards/margins": 0.5179659724235535, "rewards/rejected": -1.9761943817138672, "step": 5550 }, { "epoch": 0.73, "eval_logits/chosen": -2.271183967590332, "eval_logits/rejected": -2.2797226905822754, "eval_logps/chosen": -490.1856384277344, "eval_logps/rejected": -501.3836975097656, "eval_loss": 0.6021662950515747, "eval_rewards/accuracies": 0.6694999933242798, "eval_rewards/chosen": -1.574361801147461, "eval_rewards/margins": 0.4283748269081116, "eval_rewards/rejected": -2.002736806869507, "eval_runtime": 197.0997, "eval_samples_per_second": 10.147, "eval_steps_per_second": 5.074, "step": 5550 }, { "epoch": 0.73, "learning_rate": 1.0474058161631168e-06, "logits/chosen": -2.5028529167175293, "logits/rejected": -2.4594624042510986, "logps/chosen": -553.6297607421875, "logps/rejected": -557.9371337890625, "loss": 0.6443, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -1.6656099557876587, "rewards/margins": 0.32611754536628723, "rewards/rejected": -1.9917274713516235, "step": 5560 }, { "epoch": 0.73, "eval_logits/chosen": -2.270707845687866, "eval_logits/rejected": -2.279705047607422, "eval_logps/chosen": -488.2053527832031, "eval_logps/rejected": -499.2417907714844, "eval_loss": 0.6019599437713623, "eval_rewards/accuracies": 0.6740000247955322, "eval_rewards/chosen": -1.554559350013733, "eval_rewards/margins": 0.42675837874412537, "eval_rewards/rejected": -1.9813178777694702, "eval_runtime": 197.2341, "eval_samples_per_second": 10.14, "eval_steps_per_second": 5.07, "step": 5560 }, { "epoch": 0.73, "learning_rate": 1.0381246569307077e-06, "logits/chosen": -2.548515796661377, "logits/rejected": -2.5208840370178223, "logps/chosen": -537.696044921875, "logps/rejected": -528.3955688476562, "loss": 0.6073, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.6977163553237915, "rewards/margins": 0.3686388432979584, "rewards/rejected": -2.0663552284240723, "step": 5570 }, { "epoch": 0.73, "eval_logits/chosen": -2.273380756378174, "eval_logits/rejected": -2.282517194747925, "eval_logps/chosen": -486.97906494140625, "eval_logps/rejected": -497.864013671875, "eval_loss": 0.6022564768791199, "eval_rewards/accuracies": 0.6710000038146973, "eval_rewards/chosen": -1.5422965288162231, "eval_rewards/margins": 0.4252430200576782, "eval_rewards/rejected": -1.967539668083191, "eval_runtime": 197.1563, "eval_samples_per_second": 10.144, "eval_steps_per_second": 5.072, "step": 5570 }, { "epoch": 0.73, "learning_rate": 1.0288740144001722e-06, "logits/chosen": -2.544621229171753, "logits/rejected": -2.495824098587036, "logps/chosen": -473.4520568847656, "logps/rejected": -455.8770446777344, "loss": 0.6376, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.4984050989151, "rewards/margins": 0.3813208043575287, "rewards/rejected": -1.8797260522842407, "step": 5580 }, { "epoch": 0.73, "eval_logits/chosen": -2.277961492538452, "eval_logits/rejected": -2.287504196166992, "eval_logps/chosen": -484.2668151855469, "eval_logps/rejected": -494.9880676269531, "eval_loss": 0.602099597454071, "eval_rewards/accuracies": 0.6725000143051147, "eval_rewards/chosen": -1.515174150466919, "eval_rewards/margins": 0.42360609769821167, "eval_rewards/rejected": -1.9387801885604858, "eval_runtime": 197.048, "eval_samples_per_second": 10.15, "eval_steps_per_second": 5.075, "step": 5580 }, { "epoch": 0.73, "learning_rate": 1.0196540816790127e-06, "logits/chosen": -2.4399209022521973, "logits/rejected": -2.40258526802063, "logps/chosen": -455.9623107910156, "logps/rejected": -428.95220947265625, "loss": 0.6265, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.552139163017273, "rewards/margins": 0.3237138092517853, "rewards/rejected": -1.8758528232574463, "step": 5590 }, { "epoch": 0.73, "eval_logits/chosen": -2.283484697341919, "eval_logits/rejected": -2.2933106422424316, "eval_logps/chosen": -481.48687744140625, "eval_logps/rejected": -491.9718017578125, "eval_loss": 0.6015101075172424, "eval_rewards/accuracies": 0.6744999885559082, "eval_rewards/chosen": -1.4873746633529663, "eval_rewards/margins": 0.421243280172348, "eval_rewards/rejected": -1.9086179733276367, "eval_runtime": 197.2622, "eval_samples_per_second": 10.139, "eval_steps_per_second": 5.069, "step": 5590 }, { "epoch": 0.73, "learning_rate": 1.0104650512336679e-06, "logits/chosen": -2.6136372089385986, "logits/rejected": -2.5906195640563965, "logps/chosen": -479.2579650878906, "logps/rejected": -477.80609130859375, "loss": 0.6676, "rewards/accuracies": 0.625, "rewards/chosen": -1.4934594631195068, "rewards/margins": 0.5157946944236755, "rewards/rejected": -2.009254217147827, "step": 5600 }, { "epoch": 0.73, "eval_logits/chosen": -2.2854361534118652, "eval_logits/rejected": -2.2954437732696533, "eval_logps/chosen": -479.8711242675781, "eval_logps/rejected": -490.2027282714844, "eval_loss": 0.6013363599777222, "eval_rewards/accuracies": 0.6725000143051147, "eval_rewards/chosen": -1.4712170362472534, "eval_rewards/margins": 0.4197098910808563, "eval_rewards/rejected": -1.8909268379211426, "eval_runtime": 197.0989, "eval_samples_per_second": 10.147, "eval_steps_per_second": 5.074, "step": 5600 }, { "epoch": 0.73, "learning_rate": 1.0013071148854861e-06, "logits/chosen": -2.4359683990478516, "logits/rejected": -2.471727132797241, "logps/chosen": -435.45135498046875, "logps/rejected": -510.74151611328125, "loss": 0.4983, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.4019895792007446, "rewards/margins": 0.7769641876220703, "rewards/rejected": -2.1789536476135254, "step": 5610 }, { "epoch": 0.73, "eval_logits/chosen": -2.2821550369262695, "eval_logits/rejected": -2.2920167446136475, "eval_logps/chosen": -481.1645202636719, "eval_logps/rejected": -491.68902587890625, "eval_loss": 0.6012270450592041, "eval_rewards/accuracies": 0.6735000014305115, "eval_rewards/chosen": -1.4841513633728027, "eval_rewards/margins": 0.42163896560668945, "eval_rewards/rejected": -1.9057903289794922, "eval_runtime": 196.9735, "eval_samples_per_second": 10.154, "eval_steps_per_second": 5.077, "step": 5610 }, { "epoch": 0.74, "learning_rate": 9.921804638067292e-07, "logits/chosen": -2.549757719039917, "logits/rejected": -2.479682445526123, "logps/chosen": -484.84307861328125, "logps/rejected": -484.265380859375, "loss": 0.5565, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.5029096603393555, "rewards/margins": 0.5209773778915405, "rewards/rejected": -2.0238871574401855, "step": 5620 }, { "epoch": 0.74, "eval_logits/chosen": -2.2793734073638916, "eval_logits/rejected": -2.288806200027466, "eval_logps/chosen": -483.9366760253906, "eval_logps/rejected": -494.7959899902344, "eval_loss": 0.6013678908348083, "eval_rewards/accuracies": 0.6735000014305115, "eval_rewards/chosen": -1.5118728876113892, "eval_rewards/margins": 0.4249865412712097, "eval_rewards/rejected": -1.936859369277954, "eval_runtime": 196.9929, "eval_samples_per_second": 10.153, "eval_steps_per_second": 5.076, "step": 5620 }, { "epoch": 0.74, "learning_rate": 9.830852885165749e-07, "logits/chosen": -2.3858892917633057, "logits/rejected": -2.5052125453948975, "logps/chosen": -443.6277770996094, "logps/rejected": -522.7201538085938, "loss": 0.6331, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.6711645126342773, "rewards/margins": 0.36261284351348877, "rewards/rejected": -2.0337772369384766, "step": 5630 }, { "epoch": 0.74, "eval_logits/chosen": -2.274428606033325, "eval_logits/rejected": -2.283668041229248, "eval_logps/chosen": -487.4775695800781, "eval_logps/rejected": -498.7555847167969, "eval_loss": 0.6015153527259827, "eval_rewards/accuracies": 0.6735000014305115, "eval_rewards/chosen": -1.5472811460494995, "eval_rewards/margins": 0.4291747510433197, "eval_rewards/rejected": -1.9764559268951416, "eval_runtime": 196.9769, "eval_samples_per_second": 10.153, "eval_steps_per_second": 5.077, "step": 5630 }, { "epoch": 0.74, "learning_rate": 9.740217788771453e-07, "logits/chosen": -2.4526009559631348, "logits/rejected": -2.5034918785095215, "logps/chosen": -467.23858642578125, "logps/rejected": -472.4024963378906, "loss": 0.6273, "rewards/accuracies": 0.75, "rewards/chosen": -1.4195793867111206, "rewards/margins": 0.3300246298313141, "rewards/rejected": -1.7496038675308228, "step": 5640 }, { "epoch": 0.74, "eval_logits/chosen": -2.2742860317230225, "eval_logits/rejected": -2.2834599018096924, "eval_logps/chosen": -488.29083251953125, "eval_logps/rejected": -499.7831726074219, "eval_loss": 0.601729154586792, "eval_rewards/accuracies": 0.6754999756813049, "eval_rewards/chosen": -1.555414080619812, "eval_rewards/margins": 0.4313174784183502, "eval_rewards/rejected": -1.9867314100265503, "eval_runtime": 196.9888, "eval_samples_per_second": 10.153, "eval_steps_per_second": 5.076, "step": 5640 }, { "epoch": 0.74, "learning_rate": 9.649901240895374e-07, "logits/chosen": -2.4312241077423096, "logits/rejected": -2.428156852722168, "logps/chosen": -451.2998962402344, "logps/rejected": -492.22003173828125, "loss": 0.5639, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.4851653575897217, "rewards/margins": 0.5256800651550293, "rewards/rejected": -2.010845184326172, "step": 5650 }, { "epoch": 0.74, "eval_logits/chosen": -2.2733027935028076, "eval_logits/rejected": -2.2820732593536377, "eval_logps/chosen": -488.8881530761719, "eval_logps/rejected": -500.4703063964844, "eval_loss": 0.6017880439758301, "eval_rewards/accuracies": 0.6740000247955322, "eval_rewards/chosen": -1.561387538909912, "eval_rewards/margins": 0.43221515417099, "eval_rewards/rejected": -1.9936028718948364, "eval_runtime": 197.1915, "eval_samples_per_second": 10.142, "eval_steps_per_second": 5.071, "step": 5650 }, { "epoch": 0.74, "learning_rate": 9.559905126898803e-07, "logits/chosen": -2.5057854652404785, "logits/rejected": -2.45814847946167, "logps/chosen": -486.2225646972656, "logps/rejected": -483.7979431152344, "loss": 0.5558, "rewards/accuracies": 0.75, "rewards/chosen": -1.4760544300079346, "rewards/margins": 0.47292762994766235, "rewards/rejected": -1.9489818811416626, "step": 5660 }, { "epoch": 0.74, "eval_logits/chosen": -2.2709062099456787, "eval_logits/rejected": -2.2793853282928467, "eval_logps/chosen": -489.8941345214844, "eval_logps/rejected": -501.52117919921875, "eval_loss": 0.6018960475921631, "eval_rewards/accuracies": 0.6740000247955322, "eval_rewards/chosen": -1.5714472532272339, "eval_rewards/margins": 0.43266430497169495, "eval_rewards/rejected": -2.0041117668151855, "eval_runtime": 197.0072, "eval_samples_per_second": 10.152, "eval_steps_per_second": 5.076, "step": 5660 }, { "epoch": 0.74, "learning_rate": 9.470231325453958e-07, "logits/chosen": -2.486539125442505, "logits/rejected": -2.4017176628112793, "logps/chosen": -487.76812744140625, "logps/rejected": -487.40020751953125, "loss": 0.6319, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.659186601638794, "rewards/margins": 0.414996862411499, "rewards/rejected": -2.074183225631714, "step": 5670 }, { "epoch": 0.74, "eval_logits/chosen": -2.2720530033111572, "eval_logits/rejected": -2.280398368835449, "eval_logps/chosen": -489.9786376953125, "eval_logps/rejected": -501.5250549316406, "eval_loss": 0.6018633246421814, "eval_rewards/accuracies": 0.6735000014305115, "eval_rewards/chosen": -1.5722917318344116, "eval_rewards/margins": 0.4318588972091675, "eval_rewards/rejected": -2.004150629043579, "eval_runtime": 196.9852, "eval_samples_per_second": 10.153, "eval_steps_per_second": 5.077, "step": 5670 }, { "epoch": 0.74, "learning_rate": 9.380881708504741e-07, "logits/chosen": -2.430464029312134, "logits/rejected": -2.3562910556793213, "logps/chosen": -424.9378356933594, "logps/rejected": -422.5223083496094, "loss": 0.6027, "rewards/accuracies": 0.75, "rewards/chosen": -1.4295918941497803, "rewards/margins": 0.45209747552871704, "rewards/rejected": -1.881689429283142, "step": 5680 }, { "epoch": 0.74, "eval_logits/chosen": -2.2714383602142334, "eval_logits/rejected": -2.279670000076294, "eval_logps/chosen": -489.5482482910156, "eval_logps/rejected": -501.127685546875, "eval_loss": 0.6015214323997498, "eval_rewards/accuracies": 0.671999990940094, "eval_rewards/chosen": -1.5679885149002075, "eval_rewards/margins": 0.4321881830692291, "eval_rewards/rejected": -2.0001769065856934, "eval_runtime": 197.0412, "eval_samples_per_second": 10.15, "eval_steps_per_second": 5.075, "step": 5680 }, { "epoch": 0.74, "learning_rate": 9.291858141227733e-07, "logits/chosen": -2.5464229583740234, "logits/rejected": -2.510371208190918, "logps/chosen": -473.4132385253906, "logps/rejected": -516.4024658203125, "loss": 0.6094, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.5813710689544678, "rewards/margins": 0.36329102516174316, "rewards/rejected": -1.94466233253479, "step": 5690 }, { "epoch": 0.74, "eval_logits/chosen": -2.272278308868408, "eval_logits/rejected": -2.2808241844177246, "eval_logps/chosen": -486.8964538574219, "eval_logps/rejected": -498.2379455566406, "eval_loss": 0.6014631390571594, "eval_rewards/accuracies": 0.675000011920929, "eval_rewards/chosen": -1.5414706468582153, "eval_rewards/margins": 0.42980849742889404, "eval_rewards/rejected": -1.9712789058685303, "eval_runtime": 196.9031, "eval_samples_per_second": 10.157, "eval_steps_per_second": 5.079, "step": 5690 }, { "epoch": 0.75, "learning_rate": 9.203162481993175e-07, "logits/chosen": -2.574666976928711, "logits/rejected": -2.5605838298797607, "logps/chosen": -517.884521484375, "logps/rejected": -548.8802490234375, "loss": 0.5418, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.382240891456604, "rewards/margins": 0.5335317254066467, "rewards/rejected": -1.9157726764678955, "step": 5700 }, { "epoch": 0.75, "eval_logits/chosen": -2.270965337753296, "eval_logits/rejected": -2.279421091079712, "eval_logps/chosen": -486.2874450683594, "eval_logps/rejected": -497.8123474121094, "eval_loss": 0.6014032959938049, "eval_rewards/accuracies": 0.675000011920929, "eval_rewards/chosen": -1.5353801250457764, "eval_rewards/margins": 0.43164312839508057, "eval_rewards/rejected": -1.9670231342315674, "eval_runtime": 196.9824, "eval_samples_per_second": 10.153, "eval_steps_per_second": 5.077, "step": 5700 }, { "epoch": 0.75, "learning_rate": 9.114796582326255e-07, "logits/chosen": -2.587486505508423, "logits/rejected": -2.52176570892334, "logps/chosen": -476.721923828125, "logps/rejected": -478.04241943359375, "loss": 0.5983, "rewards/accuracies": 0.6875, "rewards/chosen": -1.6361109018325806, "rewards/margins": 0.37555187940597534, "rewards/rejected": -2.0116629600524902, "step": 5710 }, { "epoch": 0.75, "eval_logits/chosen": -2.265850067138672, "eval_logits/rejected": -2.2744035720825195, "eval_logps/chosen": -486.9491271972656, "eval_logps/rejected": -498.6160888671875, "eval_loss": 0.601709246635437, "eval_rewards/accuracies": 0.6735000014305115, "eval_rewards/chosen": -1.5419965982437134, "eval_rewards/margins": 0.4330638349056244, "eval_rewards/rejected": -1.9750605821609497, "eval_runtime": 196.9651, "eval_samples_per_second": 10.154, "eval_steps_per_second": 5.077, "step": 5710 }, { "epoch": 0.75, "learning_rate": 9.026762286868373e-07, "logits/chosen": -2.5438895225524902, "logits/rejected": -2.5906364917755127, "logps/chosen": -475.0172424316406, "logps/rejected": -548.627685546875, "loss": 0.514, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.4461355209350586, "rewards/margins": 0.6971672773361206, "rewards/rejected": -2.1433026790618896, "step": 5720 }, { "epoch": 0.75, "eval_logits/chosen": -2.2665517330169678, "eval_logits/rejected": -2.27467942237854, "eval_logps/chosen": -486.794677734375, "eval_logps/rejected": -498.497802734375, "eval_loss": 0.6015446782112122, "eval_rewards/accuracies": 0.6725000143051147, "eval_rewards/chosen": -1.5404525995254517, "eval_rewards/margins": 0.43342551589012146, "eval_rewards/rejected": -1.973878026008606, "eval_runtime": 197.0977, "eval_samples_per_second": 10.147, "eval_steps_per_second": 5.074, "step": 5720 }, { "epoch": 0.75, "learning_rate": 8.939061433338722e-07, "logits/chosen": -2.5130527019500732, "logits/rejected": -2.499204635620117, "logps/chosen": -486.7627868652344, "logps/rejected": -509.2347106933594, "loss": 0.619, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.4873241186141968, "rewards/margins": 0.36170998215675354, "rewards/rejected": -1.849034070968628, "step": 5730 }, { "epoch": 0.75, "eval_logits/chosen": -2.2673990726470947, "eval_logits/rejected": -2.2757279872894287, "eval_logps/chosen": -486.59014892578125, "eval_logps/rejected": -498.31207275390625, "eval_loss": 0.601487934589386, "eval_rewards/accuracies": 0.6729999780654907, "eval_rewards/chosen": -1.5384074449539185, "eval_rewards/margins": 0.4336126148700714, "eval_rewards/rejected": -1.9720200300216675, "eval_runtime": 197.3813, "eval_samples_per_second": 10.133, "eval_steps_per_second": 5.066, "step": 5730 }, { "epoch": 0.75, "learning_rate": 8.851695852495867e-07, "logits/chosen": -2.487215280532837, "logits/rejected": -2.556673526763916, "logps/chosen": -415.80621337890625, "logps/rejected": -482.0787048339844, "loss": 0.5531, "rewards/accuracies": 0.75, "rewards/chosen": -1.3482439517974854, "rewards/margins": 0.6087583899497986, "rewards/rejected": -1.9570024013519287, "step": 5740 }, { "epoch": 0.75, "eval_logits/chosen": -2.2642552852630615, "eval_logits/rejected": -2.2726001739501953, "eval_logps/chosen": -488.0776672363281, "eval_logps/rejected": -500.07562255859375, "eval_loss": 0.6019229292869568, "eval_rewards/accuracies": 0.6725000143051147, "eval_rewards/chosen": -1.5532824993133545, "eval_rewards/margins": 0.43637382984161377, "eval_rewards/rejected": -1.9896563291549683, "eval_runtime": 197.2187, "eval_samples_per_second": 10.141, "eval_steps_per_second": 5.071, "step": 5740 }, { "epoch": 0.75, "learning_rate": 8.764667368099525e-07, "logits/chosen": -2.383542060852051, "logits/rejected": -2.3600852489471436, "logps/chosen": -447.48663330078125, "logps/rejected": -461.00311279296875, "loss": 0.5954, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.495228886604309, "rewards/margins": 0.45612436532974243, "rewards/rejected": -1.9513533115386963, "step": 5750 }, { "epoch": 0.75, "eval_logits/chosen": -2.2611377239227295, "eval_logits/rejected": -2.2693042755126953, "eval_logps/chosen": -491.0955810546875, "eval_logps/rejected": -503.47662353515625, "eval_loss": 0.6021108627319336, "eval_rewards/accuracies": 0.6744999885559082, "eval_rewards/chosen": -1.5834616422653198, "eval_rewards/margins": 0.44020453095436096, "eval_rewards/rejected": -2.0236663818359375, "eval_runtime": 197.0868, "eval_samples_per_second": 10.148, "eval_steps_per_second": 5.074, "step": 5750 }, { "epoch": 0.75, "learning_rate": 8.677977796872541e-07, "logits/chosen": -2.4153354167938232, "logits/rejected": -2.3980398178100586, "logps/chosen": -519.9898071289062, "logps/rejected": -478.7305603027344, "loss": 0.5817, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.646702527999878, "rewards/margins": 0.4848001003265381, "rewards/rejected": -2.131502628326416, "step": 5760 }, { "epoch": 0.75, "eval_logits/chosen": -2.2558789253234863, "eval_logits/rejected": -2.263498544692993, "eval_logps/chosen": -495.2908020019531, "eval_logps/rejected": -507.93896484375, "eval_loss": 0.6024330854415894, "eval_rewards/accuracies": 0.675000011920929, "eval_rewards/chosen": -1.6254137754440308, "eval_rewards/margins": 0.4428756833076477, "eval_rewards/rejected": -2.068289279937744, "eval_runtime": 197.0828, "eval_samples_per_second": 10.148, "eval_steps_per_second": 5.074, "step": 5760 }, { "epoch": 0.76, "learning_rate": 8.591628948462913e-07, "logits/chosen": -2.3832852840423584, "logits/rejected": -2.3352439403533936, "logps/chosen": -496.6756896972656, "logps/rejected": -539.018798828125, "loss": 0.5812, "rewards/accuracies": 0.625, "rewards/chosen": -1.5903397798538208, "rewards/margins": 0.47482776641845703, "rewards/rejected": -2.0651674270629883, "step": 5770 }, { "epoch": 0.76, "eval_logits/chosen": -2.252265691757202, "eval_logits/rejected": -2.259838104248047, "eval_logps/chosen": -496.9005432128906, "eval_logps/rejected": -509.745361328125, "eval_loss": 0.6024233102798462, "eval_rewards/accuracies": 0.6765000224113464, "eval_rewards/chosen": -1.6415109634399414, "eval_rewards/margins": 0.4448423981666565, "eval_rewards/rejected": -2.086353302001953, "eval_runtime": 197.1673, "eval_samples_per_second": 10.144, "eval_steps_per_second": 5.072, "step": 5770 }, { "epoch": 0.76, "learning_rate": 8.505622625406054e-07, "logits/chosen": -2.427070140838623, "logits/rejected": -2.4216442108154297, "logps/chosen": -469.9287109375, "logps/rejected": -521.0614013671875, "loss": 0.5655, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.5248607397079468, "rewards/margins": 0.5117624402046204, "rewards/rejected": -2.036623239517212, "step": 5780 }, { "epoch": 0.76, "eval_logits/chosen": -2.2464487552642822, "eval_logits/rejected": -2.253951072692871, "eval_logps/chosen": -497.7292785644531, "eval_logps/rejected": -510.7714538574219, "eval_loss": 0.6029162406921387, "eval_rewards/accuracies": 0.6765000224113464, "eval_rewards/chosen": -1.64979887008667, "eval_rewards/margins": 0.4468156099319458, "eval_rewards/rejected": -2.0966145992279053, "eval_runtime": 197.0263, "eval_samples_per_second": 10.151, "eval_steps_per_second": 5.075, "step": 5780 }, { "epoch": 0.76, "learning_rate": 8.419960623087129e-07, "logits/chosen": -2.320359468460083, "logits/rejected": -2.3157875537872314, "logps/chosen": -408.5916442871094, "logps/rejected": -476.99945068359375, "loss": 0.6, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.4397971630096436, "rewards/margins": 0.4516604542732239, "rewards/rejected": -1.8914577960968018, "step": 5790 }, { "epoch": 0.76, "eval_logits/chosen": -2.2434499263763428, "eval_logits/rejected": -2.251415252685547, "eval_logps/chosen": -495.6883850097656, "eval_logps/rejected": -508.6549377441406, "eval_loss": 0.6027740240097046, "eval_rewards/accuracies": 0.671500027179718, "eval_rewards/chosen": -1.6293898820877075, "eval_rewards/margins": 0.44605928659439087, "eval_rewards/rejected": -2.075449228286743, "eval_runtime": 197.5339, "eval_samples_per_second": 10.125, "eval_steps_per_second": 5.062, "step": 5790 }, { "epoch": 0.76, "learning_rate": 8.334644729703617e-07, "logits/chosen": -2.474212408065796, "logits/rejected": -2.4731945991516113, "logps/chosen": -463.1309509277344, "logps/rejected": -494.95343017578125, "loss": 0.6685, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.7183113098144531, "rewards/margins": 0.34951871633529663, "rewards/rejected": -2.0678298473358154, "step": 5800 }, { "epoch": 0.76, "eval_logits/chosen": -2.242737054824829, "eval_logits/rejected": -2.251211404800415, "eval_logps/chosen": -494.2292785644531, "eval_logps/rejected": -507.03466796875, "eval_loss": 0.602836549282074, "eval_rewards/accuracies": 0.6725000143051147, "eval_rewards/chosen": -1.6147984266281128, "eval_rewards/margins": 0.44444799423217773, "eval_rewards/rejected": -2.05924654006958, "eval_runtime": 196.8054, "eval_samples_per_second": 10.162, "eval_steps_per_second": 5.081, "step": 5800 }, { "epoch": 0.76, "learning_rate": 8.249676726227931e-07, "logits/chosen": -2.3594369888305664, "logits/rejected": -2.4024455547332764, "logps/chosen": -534.9568481445312, "logps/rejected": -516.611328125, "loss": 0.6623, "rewards/accuracies": 0.625, "rewards/chosen": -1.6957733631134033, "rewards/margins": 0.27873340249061584, "rewards/rejected": -1.9745069742202759, "step": 5810 }, { "epoch": 0.76, "eval_logits/chosen": -2.245490550994873, "eval_logits/rejected": -2.2543113231658936, "eval_logps/chosen": -490.97747802734375, "eval_logps/rejected": -503.5045166015625, "eval_loss": 0.6023638844490051, "eval_rewards/accuracies": 0.671500027179718, "eval_rewards/chosen": -1.5822806358337402, "eval_rewards/margins": 0.44166430830955505, "eval_rewards/rejected": -2.023944854736328, "eval_runtime": 197.2133, "eval_samples_per_second": 10.141, "eval_steps_per_second": 5.071, "step": 5810 }, { "epoch": 0.76, "learning_rate": 8.165058386370314e-07, "logits/chosen": -2.4096219539642334, "logits/rejected": -2.40710711479187, "logps/chosen": -485.88201904296875, "logps/rejected": -538.20654296875, "loss": 0.6156, "rewards/accuracies": 0.6875, "rewards/chosen": -1.5708699226379395, "rewards/margins": 0.40414518117904663, "rewards/rejected": -1.9750150442123413, "step": 5820 }, { "epoch": 0.76, "eval_logits/chosen": -2.2488386631011963, "eval_logits/rejected": -2.2581334114074707, "eval_logps/chosen": -488.6615295410156, "eval_logps/rejected": -500.8721618652344, "eval_loss": 0.6023542881011963, "eval_rewards/accuracies": 0.671500027179718, "eval_rewards/chosen": -1.5591212511062622, "eval_rewards/margins": 0.4384998679161072, "eval_rewards/rejected": -1.997620940208435, "eval_runtime": 197.0619, "eval_samples_per_second": 10.149, "eval_steps_per_second": 5.075, "step": 5820 }, { "epoch": 0.76, "learning_rate": 8.080791476541721e-07, "logits/chosen": -2.366792678833008, "logits/rejected": -2.3858425617218018, "logps/chosen": -435.0596618652344, "logps/rejected": -487.25054931640625, "loss": 0.5679, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.5387595891952515, "rewards/margins": 0.6316569447517395, "rewards/rejected": -2.1704165935516357, "step": 5830 }, { "epoch": 0.76, "eval_logits/chosen": -2.2466695308685303, "eval_logits/rejected": -2.2560107707977295, "eval_logps/chosen": -489.2959289550781, "eval_logps/rejected": -501.5491943359375, "eval_loss": 0.6022310256958008, "eval_rewards/accuracies": 0.6704999804496765, "eval_rewards/chosen": -1.5654653310775757, "eval_rewards/margins": 0.4389267563819885, "eval_rewards/rejected": -2.00439190864563, "eval_runtime": 197.0548, "eval_samples_per_second": 10.149, "eval_steps_per_second": 5.075, "step": 5830 }, { "epoch": 0.76, "learning_rate": 7.996877755817026e-07, "logits/chosen": -2.478151321411133, "logits/rejected": -2.4247565269470215, "logps/chosen": -471.8946228027344, "logps/rejected": -460.6763610839844, "loss": 0.6497, "rewards/accuracies": 0.625, "rewards/chosen": -1.5455646514892578, "rewards/margins": 0.3121718466281891, "rewards/rejected": -1.857736587524414, "step": 5840 }, { "epoch": 0.76, "eval_logits/chosen": -2.2453322410583496, "eval_logits/rejected": -2.2547030448913574, "eval_logps/chosen": -488.5802307128906, "eval_logps/rejected": -500.7490539550781, "eval_loss": 0.6024636030197144, "eval_rewards/accuracies": 0.6735000014305115, "eval_rewards/chosen": -1.5583082437515259, "eval_rewards/margins": 0.4380822479724884, "eval_rewards/rejected": -1.9963903427124023, "eval_runtime": 197.1694, "eval_samples_per_second": 10.144, "eval_steps_per_second": 5.072, "step": 5840 }, { "epoch": 0.77, "learning_rate": 7.913318975898238e-07, "logits/chosen": -2.5146617889404297, "logits/rejected": -2.4494576454162598, "logps/chosen": -574.7240600585938, "logps/rejected": -543.0043334960938, "loss": 0.6452, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.6685956716537476, "rewards/margins": 0.39929109811782837, "rewards/rejected": -2.0678868293762207, "step": 5850 }, { "epoch": 0.77, "eval_logits/chosen": -2.2524771690368652, "eval_logits/rejected": -2.2622721195220947, "eval_logps/chosen": -485.30487060546875, "eval_logps/rejected": -497.1414489746094, "eval_loss": 0.6020148992538452, "eval_rewards/accuracies": 0.6704999804496765, "eval_rewards/chosen": -1.525554895401001, "eval_rewards/margins": 0.43475958704948425, "eval_rewards/rejected": -1.960314154624939, "eval_runtime": 197.0139, "eval_samples_per_second": 10.152, "eval_steps_per_second": 5.076, "step": 5850 }, { "epoch": 0.77, "learning_rate": 7.830116881077992e-07, "logits/chosen": -2.4145424365997314, "logits/rejected": -2.4374794960021973, "logps/chosen": -492.340576171875, "logps/rejected": -513.1353759765625, "loss": 0.546, "rewards/accuracies": 0.6875, "rewards/chosen": -1.4143502712249756, "rewards/margins": 0.5963946580886841, "rewards/rejected": -2.010745048522949, "step": 5860 }, { "epoch": 0.77, "eval_logits/chosen": -2.2555458545684814, "eval_logits/rejected": -2.265408754348755, "eval_logps/chosen": -485.0486145019531, "eval_logps/rejected": -496.7984313964844, "eval_loss": 0.601836085319519, "eval_rewards/accuracies": 0.6700000166893005, "eval_rewards/chosen": -1.5229917764663696, "eval_rewards/margins": 0.43389254808425903, "eval_rewards/rejected": -1.9568843841552734, "eval_runtime": 197.3008, "eval_samples_per_second": 10.137, "eval_steps_per_second": 5.068, "step": 5860 }, { "epoch": 0.77, "learning_rate": 7.747273208203096e-07, "logits/chosen": -2.4561104774475098, "logits/rejected": -2.4396491050720215, "logps/chosen": -484.59979248046875, "logps/rejected": -533.8568115234375, "loss": 0.6034, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.5615015029907227, "rewards/margins": 0.4400373101234436, "rewards/rejected": -2.0015387535095215, "step": 5870 }, { "epoch": 0.77, "eval_logits/chosen": -2.259807586669922, "eval_logits/rejected": -2.2697737216949463, "eval_logps/chosen": -483.45758056640625, "eval_logps/rejected": -494.9549560546875, "eval_loss": 0.601536750793457, "eval_rewards/accuracies": 0.6735000014305115, "eval_rewards/chosen": -1.5070816278457642, "eval_rewards/margins": 0.4313679337501526, "eval_rewards/rejected": -1.938449501991272, "eval_runtime": 196.9847, "eval_samples_per_second": 10.153, "eval_steps_per_second": 5.077, "step": 5870 }, { "epoch": 0.77, "learning_rate": 7.664789686638272e-07, "logits/chosen": -2.4302382469177246, "logits/rejected": -2.3414528369903564, "logps/chosen": -445.996337890625, "logps/rejected": -505.2832946777344, "loss": 0.5887, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.4469554424285889, "rewards/margins": 0.5224305987358093, "rewards/rejected": -1.969386339187622, "step": 5880 }, { "epoch": 0.77, "eval_logits/chosen": -2.263823986053467, "eval_logits/rejected": -2.273604154586792, "eval_logps/chosen": -482.2774963378906, "eval_logps/rejected": -493.6579284667969, "eval_loss": 0.6012995839118958, "eval_rewards/accuracies": 0.6740000247955322, "eval_rewards/chosen": -1.4952807426452637, "eval_rewards/margins": 0.4301982820034027, "eval_rewards/rejected": -1.9254790544509888, "eval_runtime": 196.9121, "eval_samples_per_second": 10.157, "eval_steps_per_second": 5.078, "step": 5880 }, { "epoch": 0.77, "learning_rate": 7.582668038230089e-07, "logits/chosen": -2.566232204437256, "logits/rejected": -2.5483384132385254, "logps/chosen": -482.5179138183594, "logps/rejected": -509.20068359375, "loss": 0.5731, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.3963481187820435, "rewards/margins": 0.5344547033309937, "rewards/rejected": -1.9308027029037476, "step": 5890 }, { "epoch": 0.77, "eval_logits/chosen": -2.2662353515625, "eval_logits/rejected": -2.2763512134552, "eval_logps/chosen": -481.1260681152344, "eval_logps/rejected": -492.50885009765625, "eval_loss": 0.6008906364440918, "eval_rewards/accuracies": 0.6740000247955322, "eval_rewards/chosen": -1.4837665557861328, "eval_rewards/margins": 0.4302213191986084, "eval_rewards/rejected": -1.9139878749847412, "eval_runtime": 196.9832, "eval_samples_per_second": 10.153, "eval_steps_per_second": 5.077, "step": 5890 }, { "epoch": 0.77, "learning_rate": 7.500909977271007e-07, "logits/chosen": -2.534989356994629, "logits/rejected": -2.5355916023254395, "logps/chosen": -502.8077087402344, "logps/rejected": -513.6491088867188, "loss": 0.6046, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.5153987407684326, "rewards/margins": 0.42296546697616577, "rewards/rejected": -1.938364028930664, "step": 5900 }, { "epoch": 0.77, "eval_logits/chosen": -2.2680561542510986, "eval_logits/rejected": -2.2782294750213623, "eval_logps/chosen": -479.7952880859375, "eval_logps/rejected": -491.11492919921875, "eval_loss": 0.6009992957115173, "eval_rewards/accuracies": 0.675000011920929, "eval_rewards/chosen": -1.470458745956421, "eval_rewards/margins": 0.4295899569988251, "eval_rewards/rejected": -1.900048851966858, "eval_runtime": 197.14, "eval_samples_per_second": 10.145, "eval_steps_per_second": 5.073, "step": 5900 }, { "epoch": 0.77, "learning_rate": 7.41951721046357e-07, "logits/chosen": -2.4341280460357666, "logits/rejected": -2.367621421813965, "logps/chosen": -463.6956481933594, "logps/rejected": -497.50518798828125, "loss": 0.5727, "rewards/accuracies": 0.625, "rewards/chosen": -1.3780359029769897, "rewards/margins": 0.5068241357803345, "rewards/rejected": -1.8848600387573242, "step": 5910 }, { "epoch": 0.77, "eval_logits/chosen": -2.267744541168213, "eval_logits/rejected": -2.2779266834259033, "eval_logps/chosen": -478.6908264160156, "eval_logps/rejected": -489.8962097167969, "eval_loss": 0.6008686423301697, "eval_rewards/accuracies": 0.6735000014305115, "eval_rewards/chosen": -1.4594143629074097, "eval_rewards/margins": 0.4284478425979614, "eval_rewards/rejected": -1.887862205505371, "eval_runtime": 196.874, "eval_samples_per_second": 10.159, "eval_steps_per_second": 5.079, "step": 5910 }, { "epoch": 0.77, "learning_rate": 7.338491436884787e-07, "logits/chosen": -2.3899145126342773, "logits/rejected": -2.415982723236084, "logps/chosen": -430.9964904785156, "logps/rejected": -475.78314208984375, "loss": 0.5793, "rewards/accuracies": 0.6875, "rewards/chosen": -1.4056332111358643, "rewards/margins": 0.44162511825561523, "rewards/rejected": -1.8472583293914795, "step": 5920 }, { "epoch": 0.77, "eval_logits/chosen": -2.2659740447998047, "eval_logits/rejected": -2.2761447429656982, "eval_logps/chosen": -477.880615234375, "eval_logps/rejected": -489.0225524902344, "eval_loss": 0.6009781360626221, "eval_rewards/accuracies": 0.6759999990463257, "eval_rewards/chosen": -1.4513121843338013, "eval_rewards/margins": 0.4278135299682617, "eval_rewards/rejected": -1.879125714302063, "eval_runtime": 196.8377, "eval_samples_per_second": 10.161, "eval_steps_per_second": 5.08, "step": 5920 }, { "epoch": 0.78, "learning_rate": 7.257834347950693e-07, "logits/chosen": -2.445920467376709, "logits/rejected": -2.4083142280578613, "logps/chosen": -465.4082946777344, "logps/rejected": -448.84210205078125, "loss": 0.6688, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.5156171321868896, "rewards/margins": 0.25803884863853455, "rewards/rejected": -1.7736561298370361, "step": 5930 }, { "epoch": 0.78, "eval_logits/chosen": -2.2672274112701416, "eval_logits/rejected": -2.2775511741638184, "eval_logps/chosen": -476.42205810546875, "eval_logps/rejected": -487.39031982421875, "eval_loss": 0.6011342406272888, "eval_rewards/accuracies": 0.6759999990463257, "eval_rewards/chosen": -1.4367263317108154, "eval_rewards/margins": 0.4260764718055725, "eval_rewards/rejected": -1.862802505493164, "eval_runtime": 197.2252, "eval_samples_per_second": 10.141, "eval_steps_per_second": 5.07, "step": 5930 }, { "epoch": 0.78, "learning_rate": 7.177547627380987e-07, "logits/chosen": -2.4808781147003174, "logits/rejected": -2.4829397201538086, "logps/chosen": -504.11962890625, "logps/rejected": -516.2730712890625, "loss": 0.5613, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.3955371379852295, "rewards/margins": 0.46548405289649963, "rewards/rejected": -1.8610212802886963, "step": 5940 }, { "epoch": 0.78, "eval_logits/chosen": -2.265183687210083, "eval_logits/rejected": -2.2755210399627686, "eval_logps/chosen": -476.0000305175781, "eval_logps/rejected": -486.95806884765625, "eval_loss": 0.6013757586479187, "eval_rewards/accuracies": 0.6759999990463257, "eval_rewards/chosen": -1.4325059652328491, "eval_rewards/margins": 0.42597436904907227, "eval_rewards/rejected": -1.858480453491211, "eval_runtime": 197.0249, "eval_samples_per_second": 10.151, "eval_steps_per_second": 5.076, "step": 5940 }, { "epoch": 0.78, "learning_rate": 7.097632951163949e-07, "logits/chosen": -2.447105884552002, "logits/rejected": -2.4564273357391357, "logps/chosen": -489.5555114746094, "logps/rejected": -485.93609619140625, "loss": 0.6437, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.3995712995529175, "rewards/margins": 0.33234044909477234, "rewards/rejected": -1.7319118976593018, "step": 5950 }, { "epoch": 0.78, "eval_logits/chosen": -2.2648227214813232, "eval_logits/rejected": -2.2753043174743652, "eval_logps/chosen": -475.759033203125, "eval_logps/rejected": -486.78265380859375, "eval_loss": 0.601370632648468, "eval_rewards/accuracies": 0.6725000143051147, "eval_rewards/chosen": -1.4300963878631592, "eval_rewards/margins": 0.4266298711299896, "eval_rewards/rejected": -1.8567264080047607, "eval_runtime": 197.0184, "eval_samples_per_second": 10.151, "eval_steps_per_second": 5.076, "step": 5950 }, { "epoch": 0.78, "learning_rate": 7.018091987521386e-07, "logits/chosen": -2.5762312412261963, "logits/rejected": -2.48101806640625, "logps/chosen": -496.75152587890625, "logps/rejected": -503.8118591308594, "loss": 0.6239, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.5386106967926025, "rewards/margins": 0.41985002160072327, "rewards/rejected": -1.9584605693817139, "step": 5960 }, { "epoch": 0.78, "eval_logits/chosen": -2.265352249145508, "eval_logits/rejected": -2.275949239730835, "eval_logps/chosen": -475.8328857421875, "eval_logps/rejected": -486.9198303222656, "eval_loss": 0.6012548804283142, "eval_rewards/accuracies": 0.6725000143051147, "eval_rewards/chosen": -1.4308347702026367, "eval_rewards/margins": 0.4272632896900177, "eval_rewards/rejected": -1.8580981492996216, "eval_runtime": 197.3352, "eval_samples_per_second": 10.135, "eval_steps_per_second": 5.068, "step": 5960 }, { "epoch": 0.78, "learning_rate": 6.93892639687386e-07, "logits/chosen": -2.5643134117126465, "logits/rejected": -2.5059189796447754, "logps/chosen": -499.9007263183594, "logps/rejected": -481.8160095214844, "loss": 0.5621, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.3037292957305908, "rewards/margins": 0.500011146068573, "rewards/rejected": -1.8037407398223877, "step": 5970 }, { "epoch": 0.78, "eval_logits/chosen": -2.2671244144439697, "eval_logits/rejected": -2.277761697769165, "eval_logps/chosen": -475.1019287109375, "eval_logps/rejected": -486.0708312988281, "eval_loss": 0.6014404892921448, "eval_rewards/accuracies": 0.6759999990463257, "eval_rewards/chosen": -1.4235249757766724, "eval_rewards/margins": 0.4260830581188202, "eval_rewards/rejected": -1.8496081829071045, "eval_runtime": 196.9419, "eval_samples_per_second": 10.155, "eval_steps_per_second": 5.078, "step": 5970 }, { "epoch": 0.78, "learning_rate": 6.860137831806018e-07, "logits/chosen": -2.452705144882202, "logits/rejected": -2.4688546657562256, "logps/chosen": -502.90594482421875, "logps/rejected": -490.27099609375, "loss": 0.6296, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.4148881435394287, "rewards/margins": 0.36012446880340576, "rewards/rejected": -1.7750126123428345, "step": 5980 }, { "epoch": 0.78, "eval_logits/chosen": -2.265007734298706, "eval_logits/rejected": -2.275844097137451, "eval_logps/chosen": -475.1183776855469, "eval_logps/rejected": -486.05059814453125, "eval_loss": 0.6015436053276062, "eval_rewards/accuracies": 0.6765000224113464, "eval_rewards/chosen": -1.4236900806427002, "eval_rewards/margins": 0.4257160723209381, "eval_rewards/rejected": -1.849406123161316, "eval_runtime": 196.9075, "eval_samples_per_second": 10.157, "eval_steps_per_second": 5.079, "step": 5980 }, { "epoch": 0.78, "learning_rate": 6.781727937032054e-07, "logits/chosen": -2.403275966644287, "logits/rejected": -2.36027193069458, "logps/chosen": -439.105712890625, "logps/rejected": -502.03125, "loss": 0.4725, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.2160950899124146, "rewards/margins": 0.7384004592895508, "rewards/rejected": -1.9544956684112549, "step": 5990 }, { "epoch": 0.78, "eval_logits/chosen": -2.2625324726104736, "eval_logits/rejected": -2.27329158782959, "eval_logps/chosen": -476.120361328125, "eval_logps/rejected": -487.20330810546875, "eval_loss": 0.6016895174980164, "eval_rewards/accuracies": 0.6744999885559082, "eval_rewards/chosen": -1.433709979057312, "eval_rewards/margins": 0.4272230565547943, "eval_rewards/rejected": -1.8609328269958496, "eval_runtime": 197.0728, "eval_samples_per_second": 10.149, "eval_steps_per_second": 5.074, "step": 5990 }, { "epoch": 0.79, "learning_rate": 6.703698349361437e-07, "logits/chosen": -2.4751968383789062, "logits/rejected": -2.4393486976623535, "logps/chosen": -460.97589111328125, "logps/rejected": -453.7529296875, "loss": 0.6035, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.4815524816513062, "rewards/margins": 0.4474663734436035, "rewards/rejected": -1.9290189743041992, "step": 6000 }, { "epoch": 0.79, "eval_logits/chosen": -2.2604939937591553, "eval_logits/rejected": -2.2711093425750732, "eval_logps/chosen": -476.5792236328125, "eval_logps/rejected": -487.7991943359375, "eval_loss": 0.6018210649490356, "eval_rewards/accuracies": 0.6744999885559082, "eval_rewards/chosen": -1.4382983446121216, "eval_rewards/margins": 0.4285930097103119, "eval_rewards/rejected": -1.8668912649154663, "eval_runtime": 197.257, "eval_samples_per_second": 10.139, "eval_steps_per_second": 5.07, "step": 6000 }, { "epoch": 0.79, "learning_rate": 6.626050697664682e-07, "logits/chosen": -2.4417779445648193, "logits/rejected": -2.4099671840667725, "logps/chosen": -476.68585205078125, "logps/rejected": -481.16741943359375, "loss": 0.5114, "rewards/accuracies": 0.8125, "rewards/chosen": -1.3816766738891602, "rewards/margins": 0.5975061058998108, "rewards/rejected": -1.9791828393936157, "step": 6010 }, { "epoch": 0.79, "eval_logits/chosen": -2.2580788135528564, "eval_logits/rejected": -2.2684972286224365, "eval_logps/chosen": -476.69720458984375, "eval_logps/rejected": -487.9945983886719, "eval_loss": 0.6021937727928162, "eval_rewards/accuracies": 0.6740000247955322, "eval_rewards/chosen": -1.4394779205322266, "eval_rewards/margins": 0.42936745285987854, "eval_rewards/rejected": -1.8688453435897827, "eval_runtime": 197.1161, "eval_samples_per_second": 10.146, "eval_steps_per_second": 5.073, "step": 6010 }, { "epoch": 0.79, "learning_rate": 6.548786602839404e-07, "logits/chosen": -2.4599475860595703, "logits/rejected": -2.4786622524261475, "logps/chosen": -427.3006896972656, "logps/rejected": -455.1160583496094, "loss": 0.5019, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.32621169090271, "rewards/margins": 0.6678387522697449, "rewards/rejected": -1.9940506219863892, "step": 6020 }, { "epoch": 0.79, "eval_logits/chosen": -2.2531657218933105, "eval_logits/rejected": -2.2634389400482178, "eval_logps/chosen": -478.9855041503906, "eval_logps/rejected": -490.62701416015625, "eval_loss": 0.6022667288780212, "eval_rewards/accuracies": 0.6759999990463257, "eval_rewards/chosen": -1.4623608589172363, "eval_rewards/margins": 0.43280887603759766, "eval_rewards/rejected": -1.8951694965362549, "eval_runtime": 197.0009, "eval_samples_per_second": 10.152, "eval_steps_per_second": 5.076, "step": 6020 }, { "epoch": 0.79, "learning_rate": 6.471907677776426e-07, "logits/chosen": -2.572305202484131, "logits/rejected": -2.5147862434387207, "logps/chosen": -504.14837646484375, "logps/rejected": -492.93988037109375, "loss": 0.6167, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.4243654012680054, "rewards/margins": 0.43302780389785767, "rewards/rejected": -1.8573930263519287, "step": 6030 }, { "epoch": 0.79, "eval_logits/chosen": -2.2490084171295166, "eval_logits/rejected": -2.2592198848724365, "eval_logps/chosen": -479.2915344238281, "eval_logps/rejected": -490.9763488769531, "eval_loss": 0.602845311164856, "eval_rewards/accuracies": 0.675000011920929, "eval_rewards/chosen": -1.4654208421707153, "eval_rewards/margins": 0.4332420825958252, "eval_rewards/rejected": -1.89866304397583, "eval_runtime": 197.2697, "eval_samples_per_second": 10.138, "eval_steps_per_second": 5.069, "step": 6030 }, { "epoch": 0.79, "learning_rate": 6.39541552732617e-07, "logits/chosen": -2.483621120452881, "logits/rejected": -2.469176769256592, "logps/chosen": -477.4981994628906, "logps/rejected": -550.258056640625, "loss": 0.6122, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.5244404077529907, "rewards/margins": 0.39870789647102356, "rewards/rejected": -1.9231481552124023, "step": 6040 }, { "epoch": 0.79, "eval_logits/chosen": -2.245048999786377, "eval_logits/rejected": -2.2553117275238037, "eval_logps/chosen": -479.2562561035156, "eval_logps/rejected": -490.96612548828125, "eval_loss": 0.6031754016876221, "eval_rewards/accuracies": 0.6725000143051147, "eval_rewards/chosen": -1.4650685787200928, "eval_rewards/margins": 0.4334927797317505, "eval_rewards/rejected": -1.8985613584518433, "eval_runtime": 197.0832, "eval_samples_per_second": 10.148, "eval_steps_per_second": 5.074, "step": 6040 }, { "epoch": 0.79, "learning_rate": 6.319311748265086e-07, "logits/chosen": -2.396491050720215, "logits/rejected": -2.3860714435577393, "logps/chosen": -578.4803466796875, "logps/rejected": -558.1339721679688, "loss": 0.5706, "rewards/accuracies": 0.6875, "rewards/chosen": -1.421531319618225, "rewards/margins": 0.5684695243835449, "rewards/rejected": -1.9900007247924805, "step": 6050 }, { "epoch": 0.79, "eval_logits/chosen": -2.244931221008301, "eval_logits/rejected": -2.2554006576538086, "eval_logps/chosen": -477.5636291503906, "eval_logps/rejected": -489.1151428222656, "eval_loss": 0.6030679941177368, "eval_rewards/accuracies": 0.671999990940094, "eval_rewards/chosen": -1.4481416940689087, "eval_rewards/margins": 0.4319096505641937, "eval_rewards/rejected": -1.8800513744354248, "eval_runtime": 196.8509, "eval_samples_per_second": 10.16, "eval_steps_per_second": 5.08, "step": 6050 }, { "epoch": 0.79, "learning_rate": 6.243597929262404e-07, "logits/chosen": -2.4419026374816895, "logits/rejected": -2.36991810798645, "logps/chosen": -425.7701721191406, "logps/rejected": -522.5084228515625, "loss": 0.5889, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.6083042621612549, "rewards/margins": 0.5457934737205505, "rewards/rejected": -2.15409779548645, "step": 6060 }, { "epoch": 0.79, "eval_logits/chosen": -2.243594169616699, "eval_logits/rejected": -2.2539806365966797, "eval_logps/chosen": -478.1839294433594, "eval_logps/rejected": -489.84661865234375, "eval_loss": 0.6033233404159546, "eval_rewards/accuracies": 0.6725000143051147, "eval_rewards/chosen": -1.4543453454971313, "eval_rewards/margins": 0.4330209493637085, "eval_rewards/rejected": -1.8873660564422607, "eval_runtime": 197.1008, "eval_samples_per_second": 10.147, "eval_steps_per_second": 5.074, "step": 6060 }, { "epoch": 0.79, "learning_rate": 6.168275650846875e-07, "logits/chosen": -2.5039191246032715, "logits/rejected": -2.503308057785034, "logps/chosen": -501.80194091796875, "logps/rejected": -490.83343505859375, "loss": 0.5764, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.3347852230072021, "rewards/margins": 0.53114253282547, "rewards/rejected": -1.8659274578094482, "step": 6070 }, { "epoch": 0.79, "eval_logits/chosen": -2.242035388946533, "eval_logits/rejected": -2.252312421798706, "eval_logps/chosen": -478.4716491699219, "eval_logps/rejected": -490.1448059082031, "eval_loss": 0.6033748984336853, "eval_rewards/accuracies": 0.6740000247955322, "eval_rewards/chosen": -1.4572224617004395, "eval_rewards/margins": 0.4331255555152893, "eval_rewards/rejected": -1.890347957611084, "eval_runtime": 196.9973, "eval_samples_per_second": 10.152, "eval_steps_per_second": 5.076, "step": 6070 }, { "epoch": 0.8, "learning_rate": 6.093346485373863e-07, "logits/chosen": -2.3956141471862793, "logits/rejected": -2.3247618675231934, "logps/chosen": -507.55584716796875, "logps/rejected": -502.39202880859375, "loss": 0.5793, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.5132863521575928, "rewards/margins": 0.4537445902824402, "rewards/rejected": -1.9670308828353882, "step": 6080 }, { "epoch": 0.8, "eval_logits/chosen": -2.237912178039551, "eval_logits/rejected": -2.248093605041504, "eval_logps/chosen": -479.7290344238281, "eval_logps/rejected": -491.5454406738281, "eval_loss": 0.6036680936813354, "eval_rewards/accuracies": 0.6735000014305115, "eval_rewards/chosen": -1.4697966575622559, "eval_rewards/margins": 0.4345575273036957, "eval_rewards/rejected": -1.9043540954589844, "eval_runtime": 197.1075, "eval_samples_per_second": 10.147, "eval_steps_per_second": 5.073, "step": 6080 }, { "epoch": 0.8, "learning_rate": 6.018811996992455e-07, "logits/chosen": -2.3724429607391357, "logits/rejected": -2.404536724090576, "logps/chosen": -489.76641845703125, "logps/rejected": -499.05224609375, "loss": 0.4869, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.3946596384048462, "rewards/margins": 0.7288642525672913, "rewards/rejected": -2.123523712158203, "step": 6090 }, { "epoch": 0.8, "eval_logits/chosen": -2.2322680950164795, "eval_logits/rejected": -2.242032527923584, "eval_logps/chosen": -481.9536437988281, "eval_logps/rejected": -494.1253967285156, "eval_loss": 0.6040297150611877, "eval_rewards/accuracies": 0.6725000143051147, "eval_rewards/chosen": -1.4920426607131958, "eval_rewards/margins": 0.43811145424842834, "eval_rewards/rejected": -1.9301540851593018, "eval_runtime": 197.2026, "eval_samples_per_second": 10.142, "eval_steps_per_second": 5.071, "step": 6090 }, { "epoch": 0.8, "learning_rate": 5.944673741612866e-07, "logits/chosen": -2.391608953475952, "logits/rejected": -2.3916258811950684, "logps/chosen": -503.87261962890625, "logps/rejected": -543.1837768554688, "loss": 0.6102, "rewards/accuracies": 0.6875, "rewards/chosen": -1.6538951396942139, "rewards/margins": 0.37116914987564087, "rewards/rejected": -2.02506422996521, "step": 6100 }, { "epoch": 0.8, "eval_logits/chosen": -2.2282028198242188, "eval_logits/rejected": -2.2379844188690186, "eval_logps/chosen": -483.1098327636719, "eval_logps/rejected": -495.4471740722656, "eval_loss": 0.6045427322387695, "eval_rewards/accuracies": 0.6725000143051147, "eval_rewards/chosen": -1.5036044120788574, "eval_rewards/margins": 0.439767062664032, "eval_rewards/rejected": -1.943371295928955, "eval_runtime": 197.072, "eval_samples_per_second": 10.149, "eval_steps_per_second": 5.074, "step": 6100 }, { "epoch": 0.8, "learning_rate": 5.870933266873916e-07, "logits/chosen": -2.463844060897827, "logits/rejected": -2.460224151611328, "logps/chosen": -424.7655334472656, "logps/rejected": -478.42974853515625, "loss": 0.6125, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.4444048404693604, "rewards/margins": 0.4888002872467041, "rewards/rejected": -1.9332048892974854, "step": 6110 }, { "epoch": 0.8, "eval_logits/chosen": -2.2313151359558105, "eval_logits/rejected": -2.241241693496704, "eval_logps/chosen": -481.9096984863281, "eval_logps/rejected": -494.27734375, "eval_loss": 0.6041462421417236, "eval_rewards/accuracies": 0.6729999780654907, "eval_rewards/chosen": -1.4916030168533325, "eval_rewards/margins": 0.4400705397129059, "eval_rewards/rejected": -1.9316734075546265, "eval_runtime": 196.949, "eval_samples_per_second": 10.155, "eval_steps_per_second": 5.077, "step": 6110 }, { "epoch": 0.8, "learning_rate": 5.797592112110734e-07, "logits/chosen": -2.377103567123413, "logits/rejected": -2.3861546516418457, "logps/chosen": -396.3837890625, "logps/rejected": -416.21844482421875, "loss": 0.6175, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.3936705589294434, "rewards/margins": 0.45263057947158813, "rewards/rejected": -1.8463008403778076, "step": 6120 }, { "epoch": 0.8, "eval_logits/chosen": -2.2341790199279785, "eval_logits/rejected": -2.244394063949585, "eval_logps/chosen": -479.7630920410156, "eval_logps/rejected": -491.9109802246094, "eval_loss": 0.6041192412376404, "eval_rewards/accuracies": 0.6744999885559082, "eval_rewards/chosen": -1.470137119293213, "eval_rewards/margins": 0.4378722608089447, "eval_rewards/rejected": -1.9080092906951904, "eval_runtime": 196.8201, "eval_samples_per_second": 10.162, "eval_steps_per_second": 5.081, "step": 6120 }, { "epoch": 0.8, "learning_rate": 5.724651808322645e-07, "logits/chosen": -2.410794973373413, "logits/rejected": -2.4280776977539062, "logps/chosen": -440.4469299316406, "logps/rejected": -522.4815673828125, "loss": 0.5436, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.3273398876190186, "rewards/margins": 0.6034899950027466, "rewards/rejected": -1.9308300018310547, "step": 6130 }, { "epoch": 0.8, "eval_logits/chosen": -2.2336935997009277, "eval_logits/rejected": -2.2439229488372803, "eval_logps/chosen": -479.22161865234375, "eval_logps/rejected": -491.26458740234375, "eval_loss": 0.6043089032173157, "eval_rewards/accuracies": 0.6740000247955322, "eval_rewards/chosen": -1.4647226333618164, "eval_rewards/margins": 0.4368227422237396, "eval_rewards/rejected": -1.9015452861785889, "eval_runtime": 196.811, "eval_samples_per_second": 10.162, "eval_steps_per_second": 5.081, "step": 6130 }, { "epoch": 0.8, "learning_rate": 5.652113878141194e-07, "logits/chosen": -2.323244333267212, "logits/rejected": -2.281261444091797, "logps/chosen": -386.3601989746094, "logps/rejected": -416.959716796875, "loss": 0.6058, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.3550357818603516, "rewards/margins": 0.3657050132751465, "rewards/rejected": -1.7207406759262085, "step": 6140 }, { "epoch": 0.8, "eval_logits/chosen": -2.230740785598755, "eval_logits/rejected": -2.240811586380005, "eval_logps/chosen": -479.8324890136719, "eval_logps/rejected": -492.0013122558594, "eval_loss": 0.6044318079948425, "eval_rewards/accuracies": 0.6729999780654907, "eval_rewards/chosen": -1.4708307981491089, "eval_rewards/margins": 0.43808186054229736, "eval_rewards/rejected": -1.9089127779006958, "eval_runtime": 196.8299, "eval_samples_per_second": 10.161, "eval_steps_per_second": 5.081, "step": 6140 }, { "epoch": 0.8, "learning_rate": 5.579979835798361e-07, "logits/chosen": -2.4510176181793213, "logits/rejected": -2.375293731689453, "logps/chosen": -444.62164306640625, "logps/rejected": -498.118408203125, "loss": 0.5545, "rewards/accuracies": 0.75, "rewards/chosen": -1.3945045471191406, "rewards/margins": 0.5962954759597778, "rewards/rejected": -1.990799903869629, "step": 6150 }, { "epoch": 0.8, "eval_logits/chosen": -2.228811740875244, "eval_logits/rejected": -2.23856782913208, "eval_logps/chosen": -480.6499328613281, "eval_logps/rejected": -492.998291015625, "eval_loss": 0.604430079460144, "eval_rewards/accuracies": 0.6729999780654907, "eval_rewards/chosen": -1.479004979133606, "eval_rewards/margins": 0.43987739086151123, "eval_rewards/rejected": -1.9188824892044067, "eval_runtime": 196.9241, "eval_samples_per_second": 10.156, "eval_steps_per_second": 5.078, "step": 6150 }, { "epoch": 0.81, "learning_rate": 5.508251187094932e-07, "logits/chosen": -2.475147008895874, "logits/rejected": -2.426905393600464, "logps/chosen": -513.8081665039062, "logps/rejected": -485.3460998535156, "loss": 0.6665, "rewards/accuracies": 0.625, "rewards/chosen": -1.5422532558441162, "rewards/margins": 0.36423978209495544, "rewards/rejected": -1.9064929485321045, "step": 6160 }, { "epoch": 0.81, "eval_logits/chosen": -2.230452299118042, "eval_logits/rejected": -2.2402803897857666, "eval_logps/chosen": -479.9612121582031, "eval_logps/rejected": -492.21575927734375, "eval_loss": 0.6041795611381531, "eval_rewards/accuracies": 0.6725000143051147, "eval_rewards/chosen": -1.4721179008483887, "eval_rewards/margins": 0.4389396905899048, "eval_rewards/rejected": -1.9110575914382935, "eval_runtime": 196.9866, "eval_samples_per_second": 10.153, "eval_steps_per_second": 5.076, "step": 6160 }, { "epoch": 0.81, "learning_rate": 5.436929429369122e-07, "logits/chosen": -2.437342882156372, "logits/rejected": -2.391582727432251, "logps/chosen": -441.62054443359375, "logps/rejected": -460.2799377441406, "loss": 0.6219, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.4500336647033691, "rewards/margins": 0.38196703791618347, "rewards/rejected": -1.832000732421875, "step": 6170 }, { "epoch": 0.81, "eval_logits/chosen": -2.2336361408233643, "eval_logits/rejected": -2.2436013221740723, "eval_logps/chosen": -478.6950378417969, "eval_logps/rejected": -490.7254943847656, "eval_loss": 0.6038507223129272, "eval_rewards/accuracies": 0.671999990940094, "eval_rewards/chosen": -1.459456205368042, "eval_rewards/margins": 0.43669870495796204, "eval_rewards/rejected": -1.8961549997329712, "eval_runtime": 197.1892, "eval_samples_per_second": 10.143, "eval_steps_per_second": 5.071, "step": 6170 }, { "epoch": 0.81, "learning_rate": 5.366016051465245e-07, "logits/chosen": -2.488328695297241, "logits/rejected": -2.4017763137817383, "logps/chosen": -459.72076416015625, "logps/rejected": -505.5301818847656, "loss": 0.543, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.4137346744537354, "rewards/margins": 0.6287944912910461, "rewards/rejected": -2.042529344558716, "step": 6180 }, { "epoch": 0.81, "eval_logits/chosen": -2.2341439723968506, "eval_logits/rejected": -2.2442734241485596, "eval_logps/chosen": -478.4734191894531, "eval_logps/rejected": -490.54278564453125, "eval_loss": 0.6037075519561768, "eval_rewards/accuracies": 0.6725000143051147, "eval_rewards/chosen": -1.4572402238845825, "eval_rewards/margins": 0.43708717823028564, "eval_rewards/rejected": -1.8943274021148682, "eval_runtime": 197.0345, "eval_samples_per_second": 10.151, "eval_steps_per_second": 5.075, "step": 6180 }, { "epoch": 0.81, "learning_rate": 5.295512533702701e-07, "logits/chosen": -2.417457103729248, "logits/rejected": -2.3992388248443604, "logps/chosen": -430.3199768066406, "logps/rejected": -464.72943115234375, "loss": 0.6159, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.4803340435028076, "rewards/margins": 0.4091721177101135, "rewards/rejected": -1.8895061016082764, "step": 6190 }, { "epoch": 0.81, "eval_logits/chosen": -2.234912395477295, "eval_logits/rejected": -2.245119571685791, "eval_logps/chosen": -477.7791748046875, "eval_logps/rejected": -489.7291259765625, "eval_loss": 0.6039474010467529, "eval_rewards/accuracies": 0.6740000247955322, "eval_rewards/chosen": -1.4502978324890137, "eval_rewards/margins": 0.4358930289745331, "eval_rewards/rejected": -1.8861908912658691, "eval_runtime": 196.9988, "eval_samples_per_second": 10.152, "eval_steps_per_second": 5.076, "step": 6190 }, { "epoch": 0.81, "learning_rate": 5.225420347845023e-07, "logits/chosen": -2.437502384185791, "logits/rejected": -2.470454692840576, "logps/chosen": -497.87188720703125, "logps/rejected": -513.72802734375, "loss": 0.6169, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.4202971458435059, "rewards/margins": 0.4486163258552551, "rewards/rejected": -1.8689134120941162, "step": 6200 }, { "epoch": 0.81, "eval_logits/chosen": -2.236572265625, "eval_logits/rejected": -2.2469632625579834, "eval_logps/chosen": -476.685791015625, "eval_logps/rejected": -488.4581604003906, "eval_loss": 0.6037640571594238, "eval_rewards/accuracies": 0.671999990940094, "eval_rewards/chosen": -1.4393635988235474, "eval_rewards/margins": 0.4341173768043518, "eval_rewards/rejected": -1.8734811544418335, "eval_runtime": 197.1174, "eval_samples_per_second": 10.146, "eval_steps_per_second": 5.073, "step": 6200 }, { "epoch": 0.81, "learning_rate": 5.155740957069186e-07, "logits/chosen": -2.591386318206787, "logits/rejected": -2.545407772064209, "logps/chosen": -494.3350524902344, "logps/rejected": -491.04559326171875, "loss": 0.6039, "rewards/accuracies": 0.625, "rewards/chosen": -1.5001957416534424, "rewards/margins": 0.4530462324619293, "rewards/rejected": -1.9532420635223389, "step": 6210 }, { "epoch": 0.81, "eval_logits/chosen": -2.2379512786865234, "eval_logits/rejected": -2.2482731342315674, "eval_logps/chosen": -476.0990295410156, "eval_logps/rejected": -487.8376770019531, "eval_loss": 0.6033933162689209, "eval_rewards/accuracies": 0.6725000143051147, "eval_rewards/chosen": -1.4334958791732788, "eval_rewards/margins": 0.4337805509567261, "eval_rewards/rejected": -1.8672764301300049, "eval_runtime": 196.993, "eval_samples_per_second": 10.153, "eval_steps_per_second": 5.076, "step": 6210 }, { "epoch": 0.81, "learning_rate": 5.08647581593506e-07, "logits/chosen": -2.3962807655334473, "logits/rejected": -2.3699183464050293, "logps/chosen": -445.7699279785156, "logps/rejected": -480.8201599121094, "loss": 0.5264, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.2488043308258057, "rewards/margins": 0.5704221129417419, "rewards/rejected": -1.8192262649536133, "step": 6220 }, { "epoch": 0.81, "eval_logits/chosen": -2.2378056049346924, "eval_logits/rejected": -2.248084545135498, "eval_logps/chosen": -476.7023010253906, "eval_logps/rejected": -488.5807800292969, "eval_loss": 0.603471577167511, "eval_rewards/accuracies": 0.6710000038146973, "eval_rewards/chosen": -1.4395289421081543, "eval_rewards/margins": 0.4351785182952881, "eval_rewards/rejected": -1.8747072219848633, "eval_runtime": 197.2627, "eval_samples_per_second": 10.139, "eval_steps_per_second": 5.069, "step": 6220 }, { "epoch": 0.82, "learning_rate": 5.017626370355014e-07, "logits/chosen": -2.4885125160217285, "logits/rejected": -2.3865249156951904, "logps/chosen": -464.68365478515625, "logps/rejected": -473.9043884277344, "loss": 0.5109, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.3580504655838013, "rewards/margins": 0.6627442240715027, "rewards/rejected": -2.020794630050659, "step": 6230 }, { "epoch": 0.82, "eval_logits/chosen": -2.2380645275115967, "eval_logits/rejected": -2.248126745223999, "eval_logps/chosen": -478.20233154296875, "eval_logps/rejected": -490.4144592285156, "eval_loss": 0.6032126545906067, "eval_rewards/accuracies": 0.6740000247955322, "eval_rewards/chosen": -1.4545294046401978, "eval_rewards/margins": 0.4385150074958801, "eval_rewards/rejected": -1.8930445909500122, "eval_runtime": 197.1328, "eval_samples_per_second": 10.145, "eval_steps_per_second": 5.073, "step": 6230 }, { "epoch": 0.82, "learning_rate": 4.949194057563783e-07, "logits/chosen": -2.488008737564087, "logits/rejected": -2.4647369384765625, "logps/chosen": -487.2359313964844, "logps/rejected": -460.8746643066406, "loss": 0.643, "rewards/accuracies": 0.6875, "rewards/chosen": -1.485291838645935, "rewards/margins": 0.36292168498039246, "rewards/rejected": -1.84821355342865, "step": 6240 }, { "epoch": 0.82, "eval_logits/chosen": -2.2388057708740234, "eval_logits/rejected": -2.248603343963623, "eval_logps/chosen": -478.5943298339844, "eval_logps/rejected": -490.90087890625, "eval_loss": 0.6029048562049866, "eval_rewards/accuracies": 0.6735000014305115, "eval_rewards/chosen": -1.4584497213363647, "eval_rewards/margins": 0.4394589364528656, "eval_rewards/rejected": -1.8979085683822632, "eval_runtime": 197.1294, "eval_samples_per_second": 10.146, "eval_steps_per_second": 5.073, "step": 6240 }, { "epoch": 0.82, "learning_rate": 4.881180306088418e-07, "logits/chosen": -2.4483964443206787, "logits/rejected": -2.4352147579193115, "logps/chosen": -464.33380126953125, "logps/rejected": -473.46636962890625, "loss": 0.5155, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.2545944452285767, "rewards/margins": 0.7057239413261414, "rewards/rejected": -1.9603185653686523, "step": 6250 }, { "epoch": 0.82, "eval_logits/chosen": -2.2375144958496094, "eval_logits/rejected": -2.24702787399292, "eval_logps/chosen": -480.0223083496094, "eval_logps/rejected": -492.4518127441406, "eval_loss": 0.6030805706977844, "eval_rewards/accuracies": 0.6735000014305115, "eval_rewards/chosen": -1.4727287292480469, "eval_rewards/margins": 0.4406891465187073, "eval_rewards/rejected": -1.913417935371399, "eval_runtime": 196.9294, "eval_samples_per_second": 10.156, "eval_steps_per_second": 5.078, "step": 6250 }, { "epoch": 0.82, "learning_rate": 4.813586535718512e-07, "logits/chosen": -2.433474540710449, "logits/rejected": -2.376683473587036, "logps/chosen": -516.4065551757812, "logps/rejected": -486.9189453125, "loss": 0.5705, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.4895950555801392, "rewards/margins": 0.5969935655593872, "rewards/rejected": -2.0865883827209473, "step": 6260 }, { "epoch": 0.82, "eval_logits/chosen": -2.2347970008850098, "eval_logits/rejected": -2.24385666847229, "eval_logps/chosen": -481.6423645019531, "eval_logps/rejected": -494.3458557128906, "eval_loss": 0.6028019785881042, "eval_rewards/accuracies": 0.675000011920929, "eval_rewards/chosen": -1.4889296293258667, "eval_rewards/margins": 0.44342872500419617, "eval_rewards/rejected": -1.9323583841323853, "eval_runtime": 196.7038, "eval_samples_per_second": 10.168, "eval_steps_per_second": 5.084, "step": 6260 }, { "epoch": 0.82, "learning_rate": 4.746414157476506e-07, "logits/chosen": -2.570890188217163, "logits/rejected": -2.5058765411376953, "logps/chosen": -441.581787109375, "logps/rejected": -438.6575622558594, "loss": 0.5888, "rewards/accuracies": 0.6875, "rewards/chosen": -1.4001985788345337, "rewards/margins": 0.44795188307762146, "rewards/rejected": -1.8481504917144775, "step": 6270 }, { "epoch": 0.82, "eval_logits/chosen": -2.2354795932769775, "eval_logits/rejected": -2.244324207305908, "eval_logps/chosen": -481.95770263671875, "eval_logps/rejected": -494.7315673828125, "eval_loss": 0.6022074222564697, "eval_rewards/accuracies": 0.6735000014305115, "eval_rewards/chosen": -1.4920825958251953, "eval_rewards/margins": 0.4441326856613159, "eval_rewards/rejected": -1.9362152814865112, "eval_runtime": 196.9316, "eval_samples_per_second": 10.156, "eval_steps_per_second": 5.078, "step": 6270 }, { "epoch": 0.82, "learning_rate": 4.679664573588294e-07, "logits/chosen": -2.394583225250244, "logits/rejected": -2.3104095458984375, "logps/chosen": -439.1087951660156, "logps/rejected": -446.10858154296875, "loss": 0.6121, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.452602744102478, "rewards/margins": 0.41477838158607483, "rewards/rejected": -1.8673810958862305, "step": 6280 }, { "epoch": 0.82, "eval_logits/chosen": -2.237715482711792, "eval_logits/rejected": -2.2464842796325684, "eval_logps/chosen": -480.85943603515625, "eval_logps/rejected": -493.50860595703125, "eval_loss": 0.6019992828369141, "eval_rewards/accuracies": 0.671500027179718, "eval_rewards/chosen": -1.4811004400253296, "eval_rewards/margins": 0.44288545846939087, "eval_rewards/rejected": -1.9239858388900757, "eval_runtime": 196.8947, "eval_samples_per_second": 10.158, "eval_steps_per_second": 5.079, "step": 6280 }, { "epoch": 0.82, "learning_rate": 4.6133391774538903e-07, "logits/chosen": -2.5484490394592285, "logits/rejected": -2.521597146987915, "logps/chosen": -504.94378662109375, "logps/rejected": -511.28369140625, "loss": 0.5833, "rewards/accuracies": 0.6875, "rewards/chosen": -1.4214107990264893, "rewards/margins": 0.6198548078536987, "rewards/rejected": -2.0412654876708984, "step": 6290 }, { "epoch": 0.82, "eval_logits/chosen": -2.2396035194396973, "eval_logits/rejected": -2.2481977939605713, "eval_logps/chosen": -479.93096923828125, "eval_logps/rejected": -492.5154724121094, "eval_loss": 0.6017520427703857, "eval_rewards/accuracies": 0.675000011920929, "eval_rewards/chosen": -1.4718154668807983, "eval_rewards/margins": 0.44223955273628235, "eval_rewards/rejected": -1.9140551090240479, "eval_runtime": 196.9733, "eval_samples_per_second": 10.154, "eval_steps_per_second": 5.077, "step": 6290 }, { "epoch": 0.82, "learning_rate": 4.5474393536184214e-07, "logits/chosen": -2.4809508323669434, "logits/rejected": -2.4672088623046875, "logps/chosen": -469.42193603515625, "logps/rejected": -466.1731872558594, "loss": 0.5904, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.4090373516082764, "rewards/margins": 0.42221125960350037, "rewards/rejected": -1.8312486410140991, "step": 6300 }, { "epoch": 0.82, "eval_logits/chosen": -2.2400152683258057, "eval_logits/rejected": -2.2484843730926514, "eval_logps/chosen": -480.49237060546875, "eval_logps/rejected": -493.15142822265625, "eval_loss": 0.601513147354126, "eval_rewards/accuracies": 0.6765000224113464, "eval_rewards/chosen": -1.4774298667907715, "eval_rewards/margins": 0.4429841935634613, "eval_rewards/rejected": -1.9204140901565552, "eval_runtime": 196.9799, "eval_samples_per_second": 10.153, "eval_steps_per_second": 5.077, "step": 6300 }, { "epoch": 0.83, "learning_rate": 4.4819664777431243e-07, "logits/chosen": -2.3989458084106445, "logits/rejected": -2.413045883178711, "logps/chosen": -430.36669921875, "logps/rejected": -429.37030029296875, "loss": 0.6735, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.5417275428771973, "rewards/margins": 0.24210628867149353, "rewards/rejected": -1.7838338613510132, "step": 6310 }, { "epoch": 0.83, "eval_logits/chosen": -2.2396247386932373, "eval_logits/rejected": -2.2478537559509277, "eval_logps/chosen": -480.34619140625, "eval_logps/rejected": -492.9498291015625, "eval_loss": 0.6015501022338867, "eval_rewards/accuracies": 0.6735000014305115, "eval_rewards/chosen": -1.4759677648544312, "eval_rewards/margins": 0.442430704832077, "eval_rewards/rejected": -1.918398380279541, "eval_runtime": 197.2452, "eval_samples_per_second": 10.14, "eval_steps_per_second": 5.07, "step": 6310 }, { "epoch": 0.83, "learning_rate": 4.416921916576722e-07, "logits/chosen": -2.370271921157837, "logits/rejected": -2.306959629058838, "logps/chosen": -523.9715576171875, "logps/rejected": -542.676025390625, "loss": 0.6187, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.5519903898239136, "rewards/margins": 0.39413270354270935, "rewards/rejected": -1.9461231231689453, "step": 6320 }, { "epoch": 0.83, "eval_logits/chosen": -2.239846706390381, "eval_logits/rejected": -2.2484591007232666, "eval_logps/chosen": -479.931884765625, "eval_logps/rejected": -492.4515075683594, "eval_loss": 0.60145103931427, "eval_rewards/accuracies": 0.6759999990463257, "eval_rewards/chosen": -1.4718244075775146, "eval_rewards/margins": 0.44159045815467834, "eval_rewards/rejected": -1.9134151935577393, "eval_runtime": 196.8848, "eval_samples_per_second": 10.158, "eval_steps_per_second": 5.079, "step": 6320 }, { "epoch": 0.83, "learning_rate": 4.352307027926828e-07, "logits/chosen": -2.4330732822418213, "logits/rejected": -2.4312150478363037, "logps/chosen": -477.30023193359375, "logps/rejected": -500.92828369140625, "loss": 0.5178, "rewards/accuracies": 0.8125, "rewards/chosen": -1.4120477437973022, "rewards/margins": 0.6749431490898132, "rewards/rejected": -2.08699107170105, "step": 6330 }, { "epoch": 0.83, "eval_logits/chosen": -2.2387287616729736, "eval_logits/rejected": -2.24711537361145, "eval_logps/chosen": -480.0934143066406, "eval_logps/rejected": -492.5596618652344, "eval_loss": 0.6014659404754639, "eval_rewards/accuracies": 0.671999990940094, "eval_rewards/chosen": -1.4734398126602173, "eval_rewards/margins": 0.44105657935142517, "eval_rewards/rejected": -1.9144963026046753, "eval_runtime": 197.0557, "eval_samples_per_second": 10.149, "eval_steps_per_second": 5.075, "step": 6330 }, { "epoch": 0.83, "learning_rate": 4.288123160631624e-07, "logits/chosen": -2.299553394317627, "logits/rejected": -2.3260738849639893, "logps/chosen": -446.32916259765625, "logps/rejected": -465.8793029785156, "loss": 0.6297, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.429883599281311, "rewards/margins": 0.3902866244316101, "rewards/rejected": -1.8201701641082764, "step": 6340 }, { "epoch": 0.83, "eval_logits/chosen": -2.2386250495910645, "eval_logits/rejected": -2.246933698654175, "eval_logps/chosen": -479.9998474121094, "eval_logps/rejected": -492.525634765625, "eval_loss": 0.6013615727424622, "eval_rewards/accuracies": 0.6769999861717224, "eval_rewards/chosen": -1.4725043773651123, "eval_rewards/margins": 0.44165146350860596, "eval_rewards/rejected": -1.9141559600830078, "eval_runtime": 197.1029, "eval_samples_per_second": 10.147, "eval_steps_per_second": 5.073, "step": 6340 }, { "epoch": 0.83, "learning_rate": 4.224371654531731e-07, "logits/chosen": -2.4214088916778564, "logits/rejected": -2.4219307899475098, "logps/chosen": -453.132080078125, "logps/rejected": -448.65966796875, "loss": 0.6506, "rewards/accuracies": 0.625, "rewards/chosen": -1.5275781154632568, "rewards/margins": 0.310029536485672, "rewards/rejected": -1.8376076221466064, "step": 6350 }, { "epoch": 0.83, "eval_logits/chosen": -2.2398183345794678, "eval_logits/rejected": -2.2483553886413574, "eval_logps/chosen": -478.97662353515625, "eval_logps/rejected": -491.418701171875, "eval_loss": 0.6013292074203491, "eval_rewards/accuracies": 0.675000011920929, "eval_rewards/chosen": -1.4622721672058105, "eval_rewards/margins": 0.4408148229122162, "eval_rewards/rejected": -1.9030870199203491, "eval_runtime": 196.9669, "eval_samples_per_second": 10.154, "eval_steps_per_second": 5.077, "step": 6350 }, { "epoch": 0.83, "learning_rate": 4.1610538404421837e-07, "logits/chosen": -2.3899099826812744, "logits/rejected": -2.4600508213043213, "logps/chosen": -445.9964904785156, "logps/rejected": -517.3856811523438, "loss": 0.5739, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.4132336378097534, "rewards/margins": 0.5214926600456238, "rewards/rejected": -1.9347261190414429, "step": 6360 }, { "epoch": 0.83, "eval_logits/chosen": -2.2410073280334473, "eval_logits/rejected": -2.2497498989105225, "eval_logps/chosen": -478.55841064453125, "eval_logps/rejected": -490.9558410644531, "eval_loss": 0.6013907790184021, "eval_rewards/accuracies": 0.671999990940094, "eval_rewards/chosen": -1.45809006690979, "eval_rewards/margins": 0.44036784768104553, "eval_rewards/rejected": -1.8984578847885132, "eval_runtime": 197.0733, "eval_samples_per_second": 10.149, "eval_steps_per_second": 5.074, "step": 6360 }, { "epoch": 0.83, "learning_rate": 4.098171040124699e-07, "logits/chosen": -2.4912033081054688, "logits/rejected": -2.441131114959717, "logps/chosen": -545.6764526367188, "logps/rejected": -493.4827575683594, "loss": 0.6565, "rewards/accuracies": 0.625, "rewards/chosen": -1.530342698097229, "rewards/margins": 0.35169893503189087, "rewards/rejected": -1.8820416927337646, "step": 6370 }, { "epoch": 0.83, "eval_logits/chosen": -2.241713285446167, "eval_logits/rejected": -2.250527858734131, "eval_logps/chosen": -478.0467834472656, "eval_logps/rejected": -490.4145812988281, "eval_loss": 0.6011056303977966, "eval_rewards/accuracies": 0.6765000224113464, "eval_rewards/chosen": -1.45297372341156, "eval_rewards/margins": 0.4400714933872223, "eval_rewards/rejected": -1.89304518699646, "eval_runtime": 196.8293, "eval_samples_per_second": 10.161, "eval_steps_per_second": 5.081, "step": 6370 }, { "epoch": 0.83, "learning_rate": 4.03572456626006e-07, "logits/chosen": -2.4287922382354736, "logits/rejected": -2.431551694869995, "logps/chosen": -479.798583984375, "logps/rejected": -491.73321533203125, "loss": 0.6382, "rewards/accuracies": 0.625, "rewards/chosen": -1.4386435747146606, "rewards/margins": 0.33013081550598145, "rewards/rejected": -1.7687742710113525, "step": 6380 }, { "epoch": 0.83, "eval_logits/chosen": -2.2448699474334717, "eval_logits/rejected": -2.2537577152252197, "eval_logps/chosen": -476.6830139160156, "eval_logps/rejected": -488.75482177734375, "eval_loss": 0.6012539863586426, "eval_rewards/accuracies": 0.6744999885559082, "eval_rewards/chosen": -1.4393357038497925, "eval_rewards/margins": 0.43711209297180176, "eval_rewards/rejected": -1.8764480352401733, "eval_runtime": 196.7731, "eval_samples_per_second": 10.164, "eval_steps_per_second": 5.082, "step": 6380 }, { "epoch": 0.84, "learning_rate": 3.9737157224207265e-07, "logits/chosen": -2.4541070461273193, "logits/rejected": -2.4512617588043213, "logps/chosen": -434.3753356933594, "logps/rejected": -461.8311462402344, "loss": 0.6057, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.3837672472000122, "rewards/margins": 0.3907342553138733, "rewards/rejected": -1.7745015621185303, "step": 6390 }, { "epoch": 0.84, "eval_logits/chosen": -2.2441306114196777, "eval_logits/rejected": -2.253051280975342, "eval_logps/chosen": -476.2626037597656, "eval_logps/rejected": -488.24658203125, "eval_loss": 0.6012148857116699, "eval_rewards/accuracies": 0.675000011920929, "eval_rewards/chosen": -1.4351314306259155, "eval_rewards/margins": 0.4362344443798065, "eval_rewards/rejected": -1.8713661432266235, "eval_runtime": 196.8681, "eval_samples_per_second": 10.159, "eval_steps_per_second": 5.08, "step": 6390 }, { "epoch": 0.84, "learning_rate": 3.912145803043596e-07, "logits/chosen": -2.4305484294891357, "logits/rejected": -2.4521608352661133, "logps/chosen": -497.2237243652344, "logps/rejected": -483.7049255371094, "loss": 0.6532, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.5712474584579468, "rewards/margins": 0.24624311923980713, "rewards/rejected": -1.817490816116333, "step": 6400 }, { "epoch": 0.84, "eval_logits/chosen": -2.2431321144104004, "eval_logits/rejected": -2.251950263977051, "eval_logps/chosen": -476.27880859375, "eval_logps/rejected": -488.2311706542969, "eval_loss": 0.6009360551834106, "eval_rewards/accuracies": 0.6754999756813049, "eval_rewards/chosen": -1.4352940320968628, "eval_rewards/margins": 0.4359172582626343, "eval_rewards/rejected": -1.8712114095687866, "eval_runtime": 197.2084, "eval_samples_per_second": 10.142, "eval_steps_per_second": 5.071, "step": 6400 }, { "epoch": 0.84, "learning_rate": 3.851016093403023e-07, "logits/chosen": -2.3944671154022217, "logits/rejected": -2.3814704418182373, "logps/chosen": -421.3961486816406, "logps/rejected": -465.965087890625, "loss": 0.5729, "rewards/accuracies": 0.6875, "rewards/chosen": -1.4689327478408813, "rewards/margins": 0.5277568101882935, "rewards/rejected": -1.9966895580291748, "step": 6410 }, { "epoch": 0.84, "eval_logits/chosen": -2.2426493167877197, "eval_logits/rejected": -2.251471519470215, "eval_logps/chosen": -476.30645751953125, "eval_logps/rejected": -488.2204895019531, "eval_loss": 0.6010193824768066, "eval_rewards/accuracies": 0.6754999756813049, "eval_rewards/chosen": -1.4355709552764893, "eval_rewards/margins": 0.4355340600013733, "eval_rewards/rejected": -1.8711049556732178, "eval_runtime": 196.8977, "eval_samples_per_second": 10.158, "eval_steps_per_second": 5.079, "step": 6410 }, { "epoch": 0.84, "learning_rate": 3.7903278695839456e-07, "logits/chosen": -2.40942120552063, "logits/rejected": -2.4272334575653076, "logps/chosen": -461.953369140625, "logps/rejected": -469.8707580566406, "loss": 0.6151, "rewards/accuracies": 0.625, "rewards/chosen": -1.4102437496185303, "rewards/margins": 0.3656821846961975, "rewards/rejected": -1.775925874710083, "step": 6420 }, { "epoch": 0.84, "eval_logits/chosen": -2.2420711517333984, "eval_logits/rejected": -2.2507448196411133, "eval_logps/chosen": -476.4427490234375, "eval_logps/rejected": -488.3708801269531, "eval_loss": 0.6009459495544434, "eval_rewards/accuracies": 0.6759999990463257, "eval_rewards/chosen": -1.436933159828186, "eval_rewards/margins": 0.43567579984664917, "eval_rewards/rejected": -1.8726087808609009, "eval_runtime": 197.1777, "eval_samples_per_second": 10.143, "eval_steps_per_second": 5.072, "step": 6420 }, { "epoch": 0.84, "learning_rate": 3.7300823984552983e-07, "logits/chosen": -2.473325252532959, "logits/rejected": -2.47148060798645, "logps/chosen": -417.900146484375, "logps/rejected": -481.50640869140625, "loss": 0.5601, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.3513211011886597, "rewards/margins": 0.47909989953041077, "rewards/rejected": -1.8304208517074585, "step": 6430 }, { "epoch": 0.84, "eval_logits/chosen": -2.240222930908203, "eval_logits/rejected": -2.2487614154815674, "eval_logps/chosen": -477.5064392089844, "eval_logps/rejected": -489.5664367675781, "eval_loss": 0.600739598274231, "eval_rewards/accuracies": 0.675000011920929, "eval_rewards/chosen": -1.4475706815719604, "eval_rewards/margins": 0.4369937479496002, "eval_rewards/rejected": -1.8845641613006592, "eval_runtime": 197.1547, "eval_samples_per_second": 10.144, "eval_steps_per_second": 5.072, "step": 6430 }, { "epoch": 0.84, "learning_rate": 3.670280937643503e-07, "logits/chosen": -2.3927805423736572, "logits/rejected": -2.369868278503418, "logps/chosen": -466.90692138671875, "logps/rejected": -466.72296142578125, "loss": 0.6153, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.4468450546264648, "rewards/margins": 0.46017885208129883, "rewards/rejected": -1.9070237874984741, "step": 6440 }, { "epoch": 0.84, "eval_logits/chosen": -2.2400424480438232, "eval_logits/rejected": -2.248690605163574, "eval_logps/chosen": -478.0989074707031, "eval_logps/rejected": -490.25750732421875, "eval_loss": 0.6008526682853699, "eval_rewards/accuracies": 0.6735000014305115, "eval_rewards/chosen": -1.453494668006897, "eval_rewards/margins": 0.4379802644252777, "eval_rewards/rejected": -1.8914748430252075, "eval_runtime": 196.9596, "eval_samples_per_second": 10.154, "eval_steps_per_second": 5.077, "step": 6440 }, { "epoch": 0.84, "learning_rate": 3.610924735506274e-07, "logits/chosen": -2.4371469020843506, "logits/rejected": -2.372954845428467, "logps/chosen": -517.828125, "logps/rejected": -467.57928466796875, "loss": 0.6261, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.4241763353347778, "rewards/margins": 0.3400752544403076, "rewards/rejected": -1.764251470565796, "step": 6450 }, { "epoch": 0.84, "eval_logits/chosen": -2.241787910461426, "eval_logits/rejected": -2.2503960132598877, "eval_logps/chosen": -477.5443115234375, "eval_logps/rejected": -489.5791015625, "eval_loss": 0.6006221771240234, "eval_rewards/accuracies": 0.675000011920929, "eval_rewards/chosen": -1.447948694229126, "eval_rewards/margins": 0.4367419481277466, "eval_rewards/rejected": -1.8846906423568726, "eval_runtime": 196.8552, "eval_samples_per_second": 10.16, "eval_steps_per_second": 5.08, "step": 6450 }, { "epoch": 0.85, "learning_rate": 3.5520150311065316e-07, "logits/chosen": -2.4051318168640137, "logits/rejected": -2.383820056915283, "logps/chosen": -488.4266662597656, "logps/rejected": -504.89776611328125, "loss": 0.5422, "rewards/accuracies": 0.75, "rewards/chosen": -1.3867498636245728, "rewards/margins": 0.5661884546279907, "rewards/rejected": -1.9529380798339844, "step": 6460 }, { "epoch": 0.85, "eval_logits/chosen": -2.2409005165100098, "eval_logits/rejected": -2.2494056224823, "eval_logps/chosen": -478.5350646972656, "eval_logps/rejected": -490.6539611816406, "eval_loss": 0.600521981716156, "eval_rewards/accuracies": 0.6735000014305115, "eval_rewards/chosen": -1.4578566551208496, "eval_rewards/margins": 0.4375828802585602, "eval_rewards/rejected": -1.895439624786377, "eval_runtime": 196.7864, "eval_samples_per_second": 10.163, "eval_steps_per_second": 5.082, "step": 6460 }, { "epoch": 0.85, "learning_rate": 3.493553054186527e-07, "logits/chosen": -2.449218273162842, "logits/rejected": -2.4553260803222656, "logps/chosen": -477.9852600097656, "logps/rejected": -502.3858337402344, "loss": 0.6368, "rewards/accuracies": 0.625, "rewards/chosen": -1.5308899879455566, "rewards/margins": 0.34937649965286255, "rewards/rejected": -1.880266785621643, "step": 6470 }, { "epoch": 0.85, "eval_logits/chosen": -2.2385032176971436, "eval_logits/rejected": -2.2466988563537598, "eval_logps/chosen": -479.49285888671875, "eval_logps/rejected": -491.6705627441406, "eval_loss": 0.6005980372428894, "eval_rewards/accuracies": 0.6744999885559082, "eval_rewards/chosen": -1.4674347639083862, "eval_rewards/margins": 0.4381706118583679, "eval_rewards/rejected": -1.9056053161621094, "eval_runtime": 197.1181, "eval_samples_per_second": 10.146, "eval_steps_per_second": 5.073, "step": 6470 }, { "epoch": 0.85, "learning_rate": 3.4355400251421977e-07, "logits/chosen": -2.3681087493896484, "logits/rejected": -2.379730701446533, "logps/chosen": -453.9375915527344, "logps/rejected": -469.03369140625, "loss": 0.6252, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.4286261796951294, "rewards/margins": 0.4488712251186371, "rewards/rejected": -1.8774973154067993, "step": 6480 }, { "epoch": 0.85, "eval_logits/chosen": -2.2367665767669678, "eval_logits/rejected": -2.2451555728912354, "eval_logps/chosen": -480.6306457519531, "eval_logps/rejected": -492.9149169921875, "eval_loss": 0.6005855798721313, "eval_rewards/accuracies": 0.6744999885559082, "eval_rewards/chosen": -1.4788126945495605, "eval_rewards/margins": 0.43923622369766235, "eval_rewards/rejected": -1.9180489778518677, "eval_runtime": 197.0243, "eval_samples_per_second": 10.151, "eval_steps_per_second": 5.076, "step": 6480 }, { "epoch": 0.85, "learning_rate": 3.3779771549976637e-07, "logits/chosen": -2.4080824851989746, "logits/rejected": -2.3765482902526855, "logps/chosen": -462.2020568847656, "logps/rejected": -484.9317321777344, "loss": 0.5981, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.5909029245376587, "rewards/margins": 0.45654287934303284, "rewards/rejected": -2.047445774078369, "step": 6490 }, { "epoch": 0.85, "eval_logits/chosen": -2.2361011505126953, "eval_logits/rejected": -2.2442541122436523, "eval_logps/chosen": -481.1080322265625, "eval_logps/rejected": -493.4491882324219, "eval_loss": 0.6004220247268677, "eval_rewards/accuracies": 0.6735000014305115, "eval_rewards/chosen": -1.4835866689682007, "eval_rewards/margins": 0.4398048222064972, "eval_rewards/rejected": -1.9233914613723755, "eval_runtime": 196.7987, "eval_samples_per_second": 10.163, "eval_steps_per_second": 5.081, "step": 6490 }, { "epoch": 0.85, "learning_rate": 3.3208656453799783e-07, "logits/chosen": -2.4739370346069336, "logits/rejected": -2.448183536529541, "logps/chosen": -442.1339416503906, "logps/rejected": -459.9366149902344, "loss": 0.5529, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.3421974182128906, "rewards/margins": 0.5246790647506714, "rewards/rejected": -1.8668766021728516, "step": 6500 }, { "epoch": 0.85, "eval_logits/chosen": -2.2351410388946533, "eval_logits/rejected": -2.2432808876037598, "eval_logps/chosen": -481.5740966796875, "eval_logps/rejected": -493.9289855957031, "eval_loss": 0.6004652380943298, "eval_rewards/accuracies": 0.6754999756813049, "eval_rewards/chosen": -1.4882471561431885, "eval_rewards/margins": 0.4399425983428955, "eval_rewards/rejected": -1.928189992904663, "eval_runtime": 197.2487, "eval_samples_per_second": 10.139, "eval_steps_per_second": 5.07, "step": 6500 }, { "epoch": 0.85, "learning_rate": 3.2642066884940064e-07, "logits/chosen": -2.4060733318328857, "logits/rejected": -2.4098830223083496, "logps/chosen": -493.73419189453125, "logps/rejected": -517.33056640625, "loss": 0.6469, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.5257792472839355, "rewards/margins": 0.3727341294288635, "rewards/rejected": -1.8985134363174438, "step": 6510 }, { "epoch": 0.85, "eval_logits/chosen": -2.234895944595337, "eval_logits/rejected": -2.242981433868408, "eval_logps/chosen": -481.7478942871094, "eval_logps/rejected": -494.1064758300781, "eval_loss": 0.600500226020813, "eval_rewards/accuracies": 0.6735000014305115, "eval_rewards/chosen": -1.4899851083755493, "eval_rewards/margins": 0.4399791359901428, "eval_rewards/rejected": -1.9299641847610474, "eval_runtime": 197.1585, "eval_samples_per_second": 10.144, "eval_steps_per_second": 5.072, "step": 6510 }, { "epoch": 0.85, "learning_rate": 3.2080014670975825e-07, "logits/chosen": -2.5220677852630615, "logits/rejected": -2.4988842010498047, "logps/chosen": -455.0575256347656, "logps/rejected": -451.1307067871094, "loss": 0.6271, "rewards/accuracies": 0.6875, "rewards/chosen": -1.4372724294662476, "rewards/margins": 0.333114355802536, "rewards/rejected": -1.7703866958618164, "step": 6520 }, { "epoch": 0.85, "eval_logits/chosen": -2.2341480255126953, "eval_logits/rejected": -2.2423062324523926, "eval_logps/chosen": -482.0383605957031, "eval_logps/rejected": -494.4227294921875, "eval_loss": 0.6005258560180664, "eval_rewards/accuracies": 0.6740000247955322, "eval_rewards/chosen": -1.4928892850875854, "eval_rewards/margins": 0.44023728370666504, "eval_rewards/rejected": -1.933126449584961, "eval_runtime": 197.2498, "eval_samples_per_second": 10.139, "eval_steps_per_second": 5.07, "step": 6520 }, { "epoch": 0.85, "learning_rate": 3.152251154476765e-07, "logits/chosen": -2.4268569946289062, "logits/rejected": -2.4182865619659424, "logps/chosen": -450.36834716796875, "logps/rejected": -480.23565673828125, "loss": 0.5816, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.4854668378829956, "rewards/margins": 0.43363720178604126, "rewards/rejected": -1.919103980064392, "step": 6530 }, { "epoch": 0.85, "eval_logits/chosen": -2.234081506729126, "eval_logits/rejected": -2.242432117462158, "eval_logps/chosen": -482.54486083984375, "eval_logps/rejected": -495.0234375, "eval_loss": 0.6005407571792603, "eval_rewards/accuracies": 0.6744999885559082, "eval_rewards/chosen": -1.4979546070098877, "eval_rewards/margins": 0.44117987155914307, "eval_rewards/rejected": -1.9391344785690308, "eval_runtime": 197.2391, "eval_samples_per_second": 10.14, "eval_steps_per_second": 5.07, "step": 6530 }, { "epoch": 0.86, "learning_rate": 3.0969569144214147e-07, "logits/chosen": -2.513247013092041, "logits/rejected": -2.4432804584503174, "logps/chosen": -486.681640625, "logps/rejected": -488.5477600097656, "loss": 0.561, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.462301254272461, "rewards/margins": 0.48198261857032776, "rewards/rejected": -1.9442840814590454, "step": 6540 }, { "epoch": 0.86, "eval_logits/chosen": -2.2322230339050293, "eval_logits/rejected": -2.240504741668701, "eval_logps/chosen": -483.12017822265625, "eval_logps/rejected": -495.6880187988281, "eval_loss": 0.6006953120231628, "eval_rewards/accuracies": 0.6725000143051147, "eval_rewards/chosen": -1.5037076473236084, "eval_rewards/margins": 0.4420722723007202, "eval_rewards/rejected": -1.945779800415039, "eval_runtime": 196.7754, "eval_samples_per_second": 10.164, "eval_steps_per_second": 5.082, "step": 6540 }, { "epoch": 0.86, "learning_rate": 3.042119901200824e-07, "logits/chosen": -2.3795104026794434, "logits/rejected": -2.4172751903533936, "logps/chosen": -436.779052734375, "logps/rejected": -513.870849609375, "loss": 0.5886, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.441890001296997, "rewards/margins": 0.4301987588405609, "rewards/rejected": -1.8720887899398804, "step": 6550 }, { "epoch": 0.86, "eval_logits/chosen": -2.230128288269043, "eval_logits/rejected": -2.2384142875671387, "eval_logps/chosen": -483.65203857421875, "eval_logps/rejected": -496.2925720214844, "eval_loss": 0.6009081602096558, "eval_rewards/accuracies": 0.6740000247955322, "eval_rewards/chosen": -1.5090264081954956, "eval_rewards/margins": 0.44279909133911133, "eval_rewards/rejected": -1.9518253803253174, "eval_runtime": 196.8398, "eval_samples_per_second": 10.161, "eval_steps_per_second": 5.08, "step": 6550 }, { "epoch": 0.86, "learning_rate": 2.9877412595396726e-07, "logits/chosen": -2.5197033882141113, "logits/rejected": -2.546976089477539, "logps/chosen": -532.2468872070312, "logps/rejected": -527.4337158203125, "loss": 0.6015, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.4535622596740723, "rewards/margins": 0.48877015709877014, "rewards/rejected": -1.9423322677612305, "step": 6560 }, { "epoch": 0.86, "eval_logits/chosen": -2.2307028770446777, "eval_logits/rejected": -2.238967180252075, "eval_logps/chosen": -482.97442626953125, "eval_logps/rejected": -495.572021484375, "eval_loss": 0.6008643507957458, "eval_rewards/accuracies": 0.6735000014305115, "eval_rewards/chosen": -1.5022499561309814, "eval_rewards/margins": 0.44237011671066284, "eval_rewards/rejected": -1.9446200132369995, "eval_runtime": 197.0423, "eval_samples_per_second": 10.15, "eval_steps_per_second": 5.075, "step": 6560 }, { "epoch": 0.86, "learning_rate": 2.933822124594124e-07, "logits/chosen": -2.4213218688964844, "logits/rejected": -2.343491792678833, "logps/chosen": -468.56561279296875, "logps/rejected": -463.643798828125, "loss": 0.6231, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -1.473144769668579, "rewards/margins": 0.37735602259635925, "rewards/rejected": -1.8505008220672607, "step": 6570 }, { "epoch": 0.86, "eval_logits/chosen": -2.232693910598755, "eval_logits/rejected": -2.2410120964050293, "eval_logps/chosen": -481.6788635253906, "eval_logps/rejected": -494.13519287109375, "eval_loss": 0.6006700396537781, "eval_rewards/accuracies": 0.6740000247955322, "eval_rewards/chosen": -1.4892946481704712, "eval_rewards/margins": 0.4409571588039398, "eval_rewards/rejected": -1.9302517175674438, "eval_runtime": 196.782, "eval_samples_per_second": 10.164, "eval_steps_per_second": 5.082, "step": 6570 }, { "epoch": 0.86, "learning_rate": 2.880363621928106e-07, "logits/chosen": -2.4120044708251953, "logits/rejected": -2.3957927227020264, "logps/chosen": -492.4474182128906, "logps/rejected": -481.00347900390625, "loss": 0.6087, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.5052903890609741, "rewards/margins": 0.3901470899581909, "rewards/rejected": -1.895437479019165, "step": 6580 }, { "epoch": 0.86, "eval_logits/chosen": -2.233774423599243, "eval_logits/rejected": -2.24210524559021, "eval_logps/chosen": -480.93988037109375, "eval_logps/rejected": -493.30426025390625, "eval_loss": 0.6006296277046204, "eval_rewards/accuracies": 0.6735000014305115, "eval_rewards/chosen": -1.4819048643112183, "eval_rewards/margins": 0.4400372803211212, "eval_rewards/rejected": -1.921942114830017, "eval_runtime": 196.6703, "eval_samples_per_second": 10.169, "eval_steps_per_second": 5.085, "step": 6580 }, { "epoch": 0.86, "learning_rate": 2.82736686748985e-07, "logits/chosen": -2.4532999992370605, "logits/rejected": -2.3916611671447754, "logps/chosen": -489.4159240722656, "logps/rejected": -455.03265380859375, "loss": 0.5943, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.4414112567901611, "rewards/margins": 0.4629778265953064, "rewards/rejected": -1.9043890237808228, "step": 6590 }, { "epoch": 0.86, "eval_logits/chosen": -2.235504388809204, "eval_logits/rejected": -2.243652105331421, "eval_logps/chosen": -481.0194396972656, "eval_logps/rejected": -493.4203186035156, "eval_loss": 0.6003859639167786, "eval_rewards/accuracies": 0.6740000247955322, "eval_rewards/chosen": -1.4827001094818115, "eval_rewards/margins": 0.4404028654098511, "eval_rewards/rejected": -1.923102855682373, "eval_runtime": 197.1197, "eval_samples_per_second": 10.146, "eval_steps_per_second": 5.073, "step": 6590 }, { "epoch": 0.86, "learning_rate": 2.774832967588556e-07, "logits/chosen": -2.450917959213257, "logits/rejected": -2.4179370403289795, "logps/chosen": -505.9139099121094, "logps/rejected": -501.72265625, "loss": 0.6125, "rewards/accuracies": 0.625, "rewards/chosen": -1.4615542888641357, "rewards/margins": 0.38382774591445923, "rewards/rejected": -1.8453820943832397, "step": 6600 }, { "epoch": 0.86, "eval_logits/chosen": -2.2341389656066895, "eval_logits/rejected": -2.242079496383667, "eval_logps/chosen": -481.54388427734375, "eval_logps/rejected": -493.9536437988281, "eval_loss": 0.6004937887191772, "eval_rewards/accuracies": 0.6729999780654907, "eval_rewards/chosen": -1.4879448413848877, "eval_rewards/margins": 0.4404914081096649, "eval_rewards/rejected": -1.9284361600875854, "eval_runtime": 197.0336, "eval_samples_per_second": 10.151, "eval_steps_per_second": 5.075, "step": 6600 }, { "epoch": 0.86, "learning_rate": 2.7227630188713326e-07, "logits/chosen": -2.489640951156616, "logits/rejected": -2.4417996406555176, "logps/chosen": -524.0570068359375, "logps/rejected": -503.34356689453125, "loss": 0.6032, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.4702174663543701, "rewards/margins": 0.5279144048690796, "rewards/rejected": -1.9981319904327393, "step": 6610 }, { "epoch": 0.86, "eval_logits/chosen": -2.235076665878296, "eval_logits/rejected": -2.2431137561798096, "eval_logps/chosen": -481.8366394042969, "eval_logps/rejected": -494.28448486328125, "eval_loss": 0.6003357172012329, "eval_rewards/accuracies": 0.6735000014305115, "eval_rewards/chosen": -1.4908719062805176, "eval_rewards/margins": 0.44087329506874084, "eval_rewards/rejected": -1.9317452907562256, "eval_runtime": 196.8034, "eval_samples_per_second": 10.162, "eval_steps_per_second": 5.081, "step": 6610 }, { "epoch": 0.87, "learning_rate": 2.671158108300284e-07, "logits/chosen": -2.5051331520080566, "logits/rejected": -2.4897053241729736, "logps/chosen": -482.470703125, "logps/rejected": -524.2755126953125, "loss": 0.6535, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.655645728111267, "rewards/margins": 0.30718177556991577, "rewards/rejected": -1.9628273248672485, "step": 6620 }, { "epoch": 0.87, "eval_logits/chosen": -2.2355244159698486, "eval_logits/rejected": -2.243473768234253, "eval_logps/chosen": -482.01336669921875, "eval_logps/rejected": -494.4171447753906, "eval_loss": 0.6003087162971497, "eval_rewards/accuracies": 0.6744999885559082, "eval_rewards/chosen": -1.4926397800445557, "eval_rewards/margins": 0.44043198227882385, "eval_rewards/rejected": -1.9330717325210571, "eval_runtime": 196.9961, "eval_samples_per_second": 10.152, "eval_steps_per_second": 5.076, "step": 6620 }, { "epoch": 0.87, "learning_rate": 2.6200193131298376e-07, "logits/chosen": -2.515141010284424, "logits/rejected": -2.5287601947784424, "logps/chosen": -498.6051330566406, "logps/rejected": -511.29669189453125, "loss": 0.5795, "rewards/accuracies": 0.6875, "rewards/chosen": -1.4465413093566895, "rewards/margins": 0.5301742553710938, "rewards/rejected": -1.9767156839370728, "step": 6630 }, { "epoch": 0.87, "eval_logits/chosen": -2.2372593879699707, "eval_logits/rejected": -2.2451207637786865, "eval_logps/chosen": -481.894775390625, "eval_logps/rejected": -494.25543212890625, "eval_loss": 0.5999908447265625, "eval_rewards/accuracies": 0.6735000014305115, "eval_rewards/chosen": -1.491453766822815, "eval_rewards/margins": 0.44000041484832764, "eval_rewards/rejected": -1.9314541816711426, "eval_runtime": 197.0556, "eval_samples_per_second": 10.149, "eval_steps_per_second": 5.075, "step": 6630 }, { "epoch": 0.87, "learning_rate": 2.569347700884217e-07, "logits/chosen": -2.476605176925659, "logits/rejected": -2.4527993202209473, "logps/chosen": -492.2688903808594, "logps/rejected": -488.52581787109375, "loss": 0.5202, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.4653801918029785, "rewards/margins": 0.6056791543960571, "rewards/rejected": -2.071059465408325, "step": 6640 }, { "epoch": 0.87, "eval_logits/chosen": -2.2377192974090576, "eval_logits/rejected": -2.2457220554351807, "eval_logps/chosen": -482.15899658203125, "eval_logps/rejected": -494.47711181640625, "eval_loss": 0.6003398895263672, "eval_rewards/accuracies": 0.6735000014305115, "eval_rewards/chosen": -1.494095802307129, "eval_rewards/margins": 0.4395754337310791, "eval_rewards/rejected": -1.9336711168289185, "eval_runtime": 196.9628, "eval_samples_per_second": 10.154, "eval_steps_per_second": 5.077, "step": 6640 }, { "epoch": 0.87, "learning_rate": 2.5191443293352186e-07, "logits/chosen": -2.4760589599609375, "logits/rejected": -2.4655823707580566, "logps/chosen": -502.6334533691406, "logps/rejected": -545.2277221679688, "loss": 0.596, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.4817638397216797, "rewards/margins": 0.5151968598365784, "rewards/rejected": -1.9969608783721924, "step": 6650 }, { "epoch": 0.87, "eval_logits/chosen": -2.2369654178619385, "eval_logits/rejected": -2.244926929473877, "eval_logps/chosen": -482.20147705078125, "eval_logps/rejected": -494.5907287597656, "eval_loss": 0.6002098321914673, "eval_rewards/accuracies": 0.6729999780654907, "eval_rewards/chosen": -1.494520902633667, "eval_rewards/margins": 0.4402860999107361, "eval_rewards/rejected": -1.9348070621490479, "eval_runtime": 196.8284, "eval_samples_per_second": 10.161, "eval_steps_per_second": 5.081, "step": 6650 }, { "epoch": 0.87, "learning_rate": 2.469410246480067e-07, "logits/chosen": -2.4040045738220215, "logits/rejected": -2.351503372192383, "logps/chosen": -447.82080078125, "logps/rejected": -474.35174560546875, "loss": 0.5465, "rewards/accuracies": 0.75, "rewards/chosen": -1.53019118309021, "rewards/margins": 0.588119626045227, "rewards/rejected": -2.1183109283447266, "step": 6660 }, { "epoch": 0.87, "eval_logits/chosen": -2.2365779876708984, "eval_logits/rejected": -2.2444467544555664, "eval_logps/chosen": -482.3564758300781, "eval_logps/rejected": -494.7445373535156, "eval_loss": 0.6002839207649231, "eval_rewards/accuracies": 0.6729999780654907, "eval_rewards/chosen": -1.4960702657699585, "eval_rewards/margins": 0.4402748942375183, "eval_rewards/rejected": -1.9363453388214111, "eval_runtime": 197.1637, "eval_samples_per_second": 10.144, "eval_steps_per_second": 5.072, "step": 6660 }, { "epoch": 0.87, "learning_rate": 2.4201464905195955e-07, "logits/chosen": -2.543325185775757, "logits/rejected": -2.540952205657959, "logps/chosen": -471.36322021484375, "logps/rejected": -488.7928771972656, "loss": 0.6745, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.5087602138519287, "rewards/margins": 0.25316157937049866, "rewards/rejected": -1.7619216442108154, "step": 6670 }, { "epoch": 0.87, "eval_logits/chosen": -2.236558675765991, "eval_logits/rejected": -2.244509696960449, "eval_logps/chosen": -482.4366149902344, "eval_logps/rejected": -494.8221435546875, "eval_loss": 0.6003116965293884, "eval_rewards/accuracies": 0.6740000247955322, "eval_rewards/chosen": -1.496871829032898, "eval_rewards/margins": 0.44024935364723206, "eval_rewards/rejected": -1.9371213912963867, "eval_runtime": 197.0333, "eval_samples_per_second": 10.151, "eval_steps_per_second": 5.075, "step": 6670 }, { "epoch": 0.87, "learning_rate": 2.3713540898365196e-07, "logits/chosen": -2.4039158821105957, "logits/rejected": -2.384819507598877, "logps/chosen": -464.6339416503906, "logps/rejected": -482.588623046875, "loss": 0.5327, "rewards/accuracies": 0.75, "rewards/chosen": -1.373579740524292, "rewards/margins": 0.5761127471923828, "rewards/rejected": -1.9496924877166748, "step": 6680 }, { "epoch": 0.87, "eval_logits/chosen": -2.23722767829895, "eval_logits/rejected": -2.2453808784484863, "eval_logps/chosen": -482.1029052734375, "eval_logps/rejected": -494.4505310058594, "eval_loss": 0.6002626419067383, "eval_rewards/accuracies": 0.6729999780654907, "eval_rewards/chosen": -1.4935351610183716, "eval_rewards/margins": 0.4398702085018158, "eval_rewards/rejected": -1.9334051609039307, "eval_runtime": 196.8241, "eval_samples_per_second": 10.161, "eval_steps_per_second": 5.081, "step": 6680 }, { "epoch": 0.88, "learning_rate": 2.3230340629740166e-07, "logits/chosen": -2.5268912315368652, "logits/rejected": -2.479428291320801, "logps/chosen": -470.13348388671875, "logps/rejected": -472.2806701660156, "loss": 0.6052, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.431485891342163, "rewards/margins": 0.33059996366500854, "rewards/rejected": -1.7620859146118164, "step": 6690 }, { "epoch": 0.88, "eval_logits/chosen": -2.2347989082336426, "eval_logits/rejected": -2.2428057193756104, "eval_logps/chosen": -482.34716796875, "eval_logps/rejected": -494.74395751953125, "eval_loss": 0.6002459526062012, "eval_rewards/accuracies": 0.6740000247955322, "eval_rewards/chosen": -1.4959776401519775, "eval_rewards/margins": 0.44036149978637695, "eval_rewards/rejected": -1.936339259147644, "eval_runtime": 196.8663, "eval_samples_per_second": 10.159, "eval_steps_per_second": 5.08, "step": 6690 }, { "epoch": 0.88, "learning_rate": 2.2751874186144357e-07, "logits/chosen": -2.497739315032959, "logits/rejected": -2.468701124191284, "logps/chosen": -498.406005859375, "logps/rejected": -471.77764892578125, "loss": 0.6264, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.3730546236038208, "rewards/margins": 0.34841588139533997, "rewards/rejected": -1.7214704751968384, "step": 6700 }, { "epoch": 0.88, "eval_logits/chosen": -2.235518217086792, "eval_logits/rejected": -2.243518590927124, "eval_logps/chosen": -482.25439453125, "eval_logps/rejected": -494.6637878417969, "eval_loss": 0.600059986114502, "eval_rewards/accuracies": 0.671999990940094, "eval_rewards/chosen": -1.4950499534606934, "eval_rewards/margins": 0.44048792123794556, "eval_rewards/rejected": -1.9355378150939941, "eval_runtime": 197.1099, "eval_samples_per_second": 10.147, "eval_steps_per_second": 5.073, "step": 6700 }, { "epoch": 0.88, "learning_rate": 2.227815155558241e-07, "logits/chosen": -2.5343174934387207, "logits/rejected": -2.5697665214538574, "logps/chosen": -496.79345703125, "logps/rejected": -521.640869140625, "loss": 0.599, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.47087824344635, "rewards/margins": 0.46131792664527893, "rewards/rejected": -1.9321959018707275, "step": 6710 }, { "epoch": 0.88, "eval_logits/chosen": -2.2363016605377197, "eval_logits/rejected": -2.2442235946655273, "eval_logps/chosen": -481.9609680175781, "eval_logps/rejected": -494.30633544921875, "eval_loss": 0.6001153588294983, "eval_rewards/accuracies": 0.6729999780654907, "eval_rewards/chosen": -1.4921154975891113, "eval_rewards/margins": 0.4398481845855713, "eval_rewards/rejected": -1.9319636821746826, "eval_runtime": 196.858, "eval_samples_per_second": 10.16, "eval_steps_per_second": 5.08, "step": 6710 }, { "epoch": 0.88, "learning_rate": 2.1809182627031883e-07, "logits/chosen": -2.5412440299987793, "logits/rejected": -2.4768128395080566, "logps/chosen": -498.4740295410156, "logps/rejected": -514.843505859375, "loss": 0.5517, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.4493227005004883, "rewards/margins": 0.5438094139099121, "rewards/rejected": -1.9931319952011108, "step": 6720 }, { "epoch": 0.88, "eval_logits/chosen": -2.2361483573913574, "eval_logits/rejected": -2.243964195251465, "eval_logps/chosen": -482.1773681640625, "eval_logps/rejected": -494.5125732421875, "eval_loss": 0.6001316905021667, "eval_rewards/accuracies": 0.6725000143051147, "eval_rewards/chosen": -1.4942797422409058, "eval_rewards/margins": 0.4397459924221039, "eval_rewards/rejected": -1.934025764465332, "eval_runtime": 196.9792, "eval_samples_per_second": 10.153, "eval_steps_per_second": 5.077, "step": 6720 }, { "epoch": 0.88, "learning_rate": 2.1344977190236372e-07, "logits/chosen": -2.3600761890411377, "logits/rejected": -2.283154249191284, "logps/chosen": -441.1805725097656, "logps/rejected": -480.2210998535156, "loss": 0.6085, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.4329464435577393, "rewards/margins": 0.3956843316555023, "rewards/rejected": -1.828630805015564, "step": 6730 }, { "epoch": 0.88, "eval_logits/chosen": -2.235147714614868, "eval_logits/rejected": -2.242946147918701, "eval_logps/chosen": -482.44781494140625, "eval_logps/rejected": -494.79400634765625, "eval_loss": 0.6002436876296997, "eval_rewards/accuracies": 0.6740000247955322, "eval_rewards/chosen": -1.4969840049743652, "eval_rewards/margins": 0.4398559629917145, "eval_rewards/rejected": -1.9368400573730469, "eval_runtime": 196.9267, "eval_samples_per_second": 10.156, "eval_steps_per_second": 5.078, "step": 6730 }, { "epoch": 0.88, "learning_rate": 2.0885544935501656e-07, "logits/chosen": -2.467778444290161, "logits/rejected": -2.52734637260437, "logps/chosen": -444.9013671875, "logps/rejected": -490.29522705078125, "loss": 0.5446, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.3631761074066162, "rewards/margins": 0.5013788342475891, "rewards/rejected": -1.86455500125885, "step": 6740 }, { "epoch": 0.88, "eval_logits/chosen": -2.2349860668182373, "eval_logits/rejected": -2.2425625324249268, "eval_logps/chosen": -482.5580749511719, "eval_logps/rejected": -494.9825134277344, "eval_loss": 0.6000815629959106, "eval_rewards/accuracies": 0.6740000247955322, "eval_rewards/chosen": -1.4980865716934204, "eval_rewards/margins": 0.4406384229660034, "eval_rewards/rejected": -1.9387251138687134, "eval_runtime": 197.4709, "eval_samples_per_second": 10.128, "eval_steps_per_second": 5.064, "step": 6740 }, { "epoch": 0.88, "learning_rate": 2.0430895453492944e-07, "logits/chosen": -2.446242570877075, "logits/rejected": -2.4823126792907715, "logps/chosen": -528.4280395507812, "logps/rejected": -505.59527587890625, "loss": 0.6626, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.5998499393463135, "rewards/margins": 0.24394333362579346, "rewards/rejected": -1.843793511390686, "step": 6750 }, { "epoch": 0.88, "eval_logits/chosen": -2.2358670234680176, "eval_logits/rejected": -2.2437267303466797, "eval_logps/chosen": -482.26654052734375, "eval_logps/rejected": -494.6257629394531, "eval_loss": 0.6001067757606506, "eval_rewards/accuracies": 0.6729999780654907, "eval_rewards/chosen": -1.4951714277267456, "eval_rewards/margins": 0.4399857223033905, "eval_rewards/rejected": -1.9351569414138794, "eval_runtime": 197.0616, "eval_samples_per_second": 10.149, "eval_steps_per_second": 5.075, "step": 6750 }, { "epoch": 0.88, "learning_rate": 1.9981038235035111e-07, "logits/chosen": -2.442606210708618, "logits/rejected": -2.4387025833129883, "logps/chosen": -446.9200744628906, "logps/rejected": -488.510009765625, "loss": 0.5305, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.3141353130340576, "rewards/margins": 0.5878725051879883, "rewards/rejected": -1.9020076990127563, "step": 6760 }, { "epoch": 0.88, "eval_logits/chosen": -2.2357892990112305, "eval_logits/rejected": -2.2436020374298096, "eval_logps/chosen": -482.08953857421875, "eval_logps/rejected": -494.4461669921875, "eval_loss": 0.60005122423172, "eval_rewards/accuracies": 0.6735000014305115, "eval_rewards/chosen": -1.4934011697769165, "eval_rewards/margins": 0.43996042013168335, "eval_rewards/rejected": -1.933361530303955, "eval_runtime": 196.9804, "eval_samples_per_second": 10.153, "eval_steps_per_second": 5.077, "step": 6760 }, { "epoch": 0.89, "learning_rate": 1.9535982670914112e-07, "logits/chosen": -2.3814468383789062, "logits/rejected": -2.3742775917053223, "logps/chosen": -506.7943420410156, "logps/rejected": -512.1139526367188, "loss": 0.5956, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.455214262008667, "rewards/margins": 0.4944564700126648, "rewards/rejected": -1.9496707916259766, "step": 6770 }, { "epoch": 0.89, "eval_logits/chosen": -2.236239433288574, "eval_logits/rejected": -2.244074821472168, "eval_logps/chosen": -481.68572998046875, "eval_logps/rejected": -493.983154296875, "eval_loss": 0.6000664234161377, "eval_rewards/accuracies": 0.6735000014305115, "eval_rewards/chosen": -1.4893630743026733, "eval_rewards/margins": 0.4393681585788727, "eval_rewards/rejected": -1.9287313222885132, "eval_runtime": 196.9554, "eval_samples_per_second": 10.155, "eval_steps_per_second": 5.077, "step": 6770 }, { "epoch": 0.89, "learning_rate": 1.9095738051681412e-07, "logits/chosen": -2.392882823944092, "logits/rejected": -2.3796443939208984, "logps/chosen": -444.6842346191406, "logps/rejected": -494.61102294921875, "loss": 0.5703, "rewards/accuracies": 0.75, "rewards/chosen": -1.58041250705719, "rewards/margins": 0.47645503282546997, "rewards/rejected": -2.0568675994873047, "step": 6780 }, { "epoch": 0.89, "eval_logits/chosen": -2.2347044944763184, "eval_logits/rejected": -2.2426021099090576, "eval_logps/chosen": -481.5654296875, "eval_logps/rejected": -493.9347839355469, "eval_loss": 0.6000974178314209, "eval_rewards/accuracies": 0.6744999885559082, "eval_rewards/chosen": -1.4881603717803955, "eval_rewards/margins": 0.4400874078273773, "eval_rewards/rejected": -1.9282478094100952, "eval_runtime": 197.2369, "eval_samples_per_second": 10.14, "eval_steps_per_second": 5.07, "step": 6780 }, { "epoch": 0.89, "learning_rate": 1.8660313567459703e-07, "logits/chosen": -2.4689860343933105, "logits/rejected": -2.507202386856079, "logps/chosen": -423.113037109375, "logps/rejected": -489.84747314453125, "loss": 0.519, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.3348274230957031, "rewards/margins": 0.7304555177688599, "rewards/rejected": -2.0652830600738525, "step": 6790 }, { "epoch": 0.89, "eval_logits/chosen": -2.234759569168091, "eval_logits/rejected": -2.242676019668579, "eval_logps/chosen": -481.4300842285156, "eval_logps/rejected": -493.7626953125, "eval_loss": 0.6002135276794434, "eval_rewards/accuracies": 0.675000011920929, "eval_rewards/chosen": -1.486806869506836, "eval_rewards/margins": 0.43971991539001465, "eval_rewards/rejected": -1.9265269041061401, "eval_runtime": 196.7938, "eval_samples_per_second": 10.163, "eval_steps_per_second": 5.081, "step": 6790 }, { "epoch": 0.89, "learning_rate": 1.8229718307751165e-07, "logits/chosen": -2.5024523735046387, "logits/rejected": -2.4301934242248535, "logps/chosen": -508.9176330566406, "logps/rejected": -503.2557678222656, "loss": 0.5557, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.5061525106430054, "rewards/margins": 0.6268715858459473, "rewards/rejected": -2.133024215698242, "step": 6800 }, { "epoch": 0.89, "eval_logits/chosen": -2.235121011734009, "eval_logits/rejected": -2.243008852005005, "eval_logps/chosen": -480.85699462890625, "eval_logps/rejected": -493.17816162109375, "eval_loss": 0.6001080274581909, "eval_rewards/accuracies": 0.6725000143051147, "eval_rewards/chosen": -1.4810760021209717, "eval_rewards/margins": 0.43960532546043396, "eval_rewards/rejected": -1.920681118965149, "eval_runtime": 196.9579, "eval_samples_per_second": 10.154, "eval_steps_per_second": 5.077, "step": 6800 }, { "epoch": 0.89, "learning_rate": 1.7803961261247864e-07, "logits/chosen": -2.397812604904175, "logits/rejected": -2.4298148155212402, "logps/chosen": -493.19952392578125, "logps/rejected": -522.0126953125, "loss": 0.5928, "rewards/accuracies": 0.75, "rewards/chosen": -1.461112380027771, "rewards/margins": 0.4707748293876648, "rewards/rejected": -1.9318872690200806, "step": 6810 }, { "epoch": 0.89, "eval_logits/chosen": -2.2350032329559326, "eval_logits/rejected": -2.242875576019287, "eval_logps/chosen": -480.5160217285156, "eval_logps/rejected": -492.8398132324219, "eval_loss": 0.6000543236732483, "eval_rewards/accuracies": 0.6735000014305115, "eval_rewards/chosen": -1.4776662588119507, "eval_rewards/margins": 0.43963193893432617, "eval_rewards/rejected": -1.9172983169555664, "eval_runtime": 197.0366, "eval_samples_per_second": 10.15, "eval_steps_per_second": 5.075, "step": 6810 }, { "epoch": 0.89, "learning_rate": 1.7383051315643772e-07, "logits/chosen": -2.451185703277588, "logits/rejected": -2.4309628009796143, "logps/chosen": -506.1298828125, "logps/rejected": -492.93927001953125, "loss": 0.6184, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.559695839881897, "rewards/margins": 0.43002885580062866, "rewards/rejected": -1.9897247552871704, "step": 6820 }, { "epoch": 0.89, "eval_logits/chosen": -2.2355458736419678, "eval_logits/rejected": -2.243511199951172, "eval_logps/chosen": -480.4143981933594, "eval_logps/rejected": -492.6844482421875, "eval_loss": 0.600212037563324, "eval_rewards/accuracies": 0.6740000247955322, "eval_rewards/chosen": -1.4766501188278198, "eval_rewards/margins": 0.4390944242477417, "eval_rewards/rejected": -1.9157445430755615, "eval_runtime": 197.0764, "eval_samples_per_second": 10.148, "eval_steps_per_second": 5.074, "step": 6820 }, { "epoch": 0.89, "learning_rate": 1.6966997257449685e-07, "logits/chosen": -2.4615304470062256, "logits/rejected": -2.423633098602295, "logps/chosen": -487.74188232421875, "logps/rejected": -501.6026306152344, "loss": 0.6065, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.4967652559280396, "rewards/margins": 0.42426905035972595, "rewards/rejected": -1.9210344552993774, "step": 6830 }, { "epoch": 0.89, "eval_logits/chosen": -2.2352402210235596, "eval_logits/rejected": -2.2431232929229736, "eval_logps/chosen": -480.4826354980469, "eval_logps/rejected": -492.7511901855469, "eval_loss": 0.6002153754234314, "eval_rewards/accuracies": 0.6744999885559082, "eval_rewards/chosen": -1.4773321151733398, "eval_rewards/margins": 0.4390796720981598, "eval_rewards/rejected": -1.9164117574691772, "eval_runtime": 196.8292, "eval_samples_per_second": 10.161, "eval_steps_per_second": 5.081, "step": 6830 }, { "epoch": 0.9, "learning_rate": 1.6555807771809375e-07, "logits/chosen": -2.443737506866455, "logits/rejected": -2.424933910369873, "logps/chosen": -455.24761962890625, "logps/rejected": -443.237060546875, "loss": 0.5943, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.4251893758773804, "rewards/margins": 0.5287860035896301, "rewards/rejected": -1.9539753198623657, "step": 6840 }, { "epoch": 0.9, "eval_logits/chosen": -2.2357749938964844, "eval_logits/rejected": -2.2437076568603516, "eval_logps/chosen": -480.072021484375, "eval_logps/rejected": -492.3343811035156, "eval_loss": 0.5999860763549805, "eval_rewards/accuracies": 0.675000011920929, "eval_rewards/chosen": -1.4732260704040527, "eval_rewards/margins": 0.43901708722114563, "eval_rewards/rejected": -1.912243127822876, "eval_runtime": 197.0688, "eval_samples_per_second": 10.149, "eval_steps_per_second": 5.074, "step": 6840 }, { "epoch": 0.9, "learning_rate": 1.6149491442318617e-07, "logits/chosen": -2.4913601875305176, "logits/rejected": -2.4751856327056885, "logps/chosen": -459.0533752441406, "logps/rejected": -494.43646240234375, "loss": 0.6122, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.4198737144470215, "rewards/margins": 0.444546639919281, "rewards/rejected": -1.8644202947616577, "step": 6850 }, { "epoch": 0.9, "eval_logits/chosen": -2.2358829975128174, "eval_logits/rejected": -2.2437386512756348, "eval_logps/chosen": -479.94873046875, "eval_logps/rejected": -492.1989440917969, "eval_loss": 0.5999601483345032, "eval_rewards/accuracies": 0.6729999780654907, "eval_rewards/chosen": -1.4719932079315186, "eval_rewards/margins": 0.43889597058296204, "eval_rewards/rejected": -1.9108891487121582, "eval_runtime": 196.9527, "eval_samples_per_second": 10.155, "eval_steps_per_second": 5.077, "step": 6850 }, { "epoch": 0.9, "learning_rate": 1.5748056750845786e-07, "logits/chosen": -2.4793450832366943, "logits/rejected": -2.4470067024230957, "logps/chosen": -486.44891357421875, "logps/rejected": -461.64312744140625, "loss": 0.5781, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.5266351699829102, "rewards/margins": 0.48350292444229126, "rewards/rejected": -2.0101380348205566, "step": 6860 }, { "epoch": 0.9, "eval_logits/chosen": -2.2366299629211426, "eval_logits/rejected": -2.2447266578674316, "eval_logps/chosen": -479.8468017578125, "eval_logps/rejected": -492.0443420410156, "eval_loss": 0.6001272201538086, "eval_rewards/accuracies": 0.6729999780654907, "eval_rewards/chosen": -1.4709739685058594, "eval_rewards/margins": 0.4383690655231476, "eval_rewards/rejected": -1.9093430042266846, "eval_runtime": 196.8817, "eval_samples_per_second": 10.158, "eval_steps_per_second": 5.079, "step": 6860 }, { "epoch": 0.9, "learning_rate": 1.5351512077355024e-07, "logits/chosen": -2.428464412689209, "logits/rejected": -2.386335849761963, "logps/chosen": -524.8412475585938, "logps/rejected": -589.8968505859375, "loss": 0.5874, "rewards/accuracies": 0.75, "rewards/chosen": -1.4802411794662476, "rewards/margins": 0.5046831965446472, "rewards/rejected": -1.984924554824829, "step": 6870 }, { "epoch": 0.9, "eval_logits/chosen": -2.2367382049560547, "eval_logits/rejected": -2.244694471359253, "eval_logps/chosen": -480.0219421386719, "eval_logps/rejected": -492.270263671875, "eval_loss": 0.5999786853790283, "eval_rewards/accuracies": 0.675000011920929, "eval_rewards/chosen": -1.47272527217865, "eval_rewards/margins": 0.4388763904571533, "eval_rewards/rejected": -1.9116017818450928, "eval_runtime": 197.036, "eval_samples_per_second": 10.15, "eval_steps_per_second": 5.075, "step": 6870 }, { "epoch": 0.9, "learning_rate": 1.4959865699730902e-07, "logits/chosen": -2.414353847503662, "logits/rejected": -2.3764188289642334, "logps/chosen": -447.9195861816406, "logps/rejected": -450.7298889160156, "loss": 0.5447, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.5458955764770508, "rewards/margins": 0.5172951817512512, "rewards/rejected": -2.0631909370422363, "step": 6880 }, { "epoch": 0.9, "eval_logits/chosen": -2.236314058303833, "eval_logits/rejected": -2.244189977645874, "eval_logps/chosen": -480.1126708984375, "eval_logps/rejected": -492.3921203613281, "eval_loss": 0.5999928116798401, "eval_rewards/accuracies": 0.6729999780654907, "eval_rewards/chosen": -1.4736328125, "eval_rewards/margins": 0.43918824195861816, "eval_rewards/rejected": -1.9128209352493286, "eval_runtime": 196.9069, "eval_samples_per_second": 10.157, "eval_steps_per_second": 5.079, "step": 6880 }, { "epoch": 0.9, "learning_rate": 1.4573125793606202e-07, "logits/chosen": -2.4773974418640137, "logits/rejected": -2.478883743286133, "logps/chosen": -425.7857360839844, "logps/rejected": -457.80084228515625, "loss": 0.6112, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -1.4574798345565796, "rewards/margins": 0.476410448551178, "rewards/rejected": -1.9338905811309814, "step": 6890 }, { "epoch": 0.9, "eval_logits/chosen": -2.2367420196533203, "eval_logits/rejected": -2.2447338104248047, "eval_logps/chosen": -480.1895751953125, "eval_logps/rejected": -492.44073486328125, "eval_loss": 0.5999522805213928, "eval_rewards/accuracies": 0.6759999990463257, "eval_rewards/chosen": -1.4744013547897339, "eval_rewards/margins": 0.43890616297721863, "eval_rewards/rejected": -1.913307547569275, "eval_runtime": 196.8113, "eval_samples_per_second": 10.162, "eval_steps_per_second": 5.081, "step": 6890 }, { "epoch": 0.9, "learning_rate": 1.4191300432190634e-07, "logits/chosen": -2.407351016998291, "logits/rejected": -2.36082124710083, "logps/chosen": -492.70574951171875, "logps/rejected": -501.9463806152344, "loss": 0.6134, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.5695149898529053, "rewards/margins": 0.4132605195045471, "rewards/rejected": -1.9827754497528076, "step": 6900 }, { "epoch": 0.9, "eval_logits/chosen": -2.2375595569610596, "eval_logits/rejected": -2.245429754257202, "eval_logps/chosen": -479.95867919921875, "eval_logps/rejected": -492.1669616699219, "eval_loss": 0.5999838709831238, "eval_rewards/accuracies": 0.6740000247955322, "eval_rewards/chosen": -1.4720933437347412, "eval_rewards/margins": 0.43847644329071045, "eval_rewards/rejected": -1.910569667816162, "eval_runtime": 196.8622, "eval_samples_per_second": 10.159, "eval_steps_per_second": 5.08, "step": 6900 }, { "epoch": 0.9, "learning_rate": 1.381439758610284e-07, "logits/chosen": -2.4294683933258057, "logits/rejected": -2.388927936553955, "logps/chosen": -458.1944274902344, "logps/rejected": -468.96124267578125, "loss": 0.5998, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.3803436756134033, "rewards/margins": 0.33844703435897827, "rewards/rejected": -1.7187906503677368, "step": 6910 }, { "epoch": 0.9, "eval_logits/chosen": -2.2376015186309814, "eval_logits/rejected": -2.245645523071289, "eval_logps/chosen": -480.1687927246094, "eval_logps/rejected": -492.4361267089844, "eval_loss": 0.5998128652572632, "eval_rewards/accuracies": 0.6744999885559082, "eval_rewards/chosen": -1.4741934537887573, "eval_rewards/margins": 0.4390679597854614, "eval_rewards/rejected": -1.9132615327835083, "eval_runtime": 197.0022, "eval_samples_per_second": 10.152, "eval_steps_per_second": 5.076, "step": 6910 }, { "epoch": 0.91, "learning_rate": 1.3442425123203596e-07, "logits/chosen": -2.542816638946533, "logits/rejected": -2.5520670413970947, "logps/chosen": -458.94775390625, "logps/rejected": -500.8182678222656, "loss": 0.5518, "rewards/accuracies": 0.75, "rewards/chosen": -1.4125124216079712, "rewards/margins": 0.5205521583557129, "rewards/rejected": -1.9330646991729736, "step": 6920 }, { "epoch": 0.91, "eval_logits/chosen": -2.2370002269744873, "eval_logits/rejected": -2.244837999343872, "eval_logps/chosen": -480.6543884277344, "eval_logps/rejected": -492.9961853027344, "eval_loss": 0.5997794270515442, "eval_rewards/accuracies": 0.6740000247955322, "eval_rewards/chosen": -1.4790493249893188, "eval_rewards/margins": 0.4398118257522583, "eval_rewards/rejected": -1.9188611507415771, "eval_runtime": 196.9578, "eval_samples_per_second": 10.154, "eval_steps_per_second": 5.077, "step": 6920 }, { "epoch": 0.91, "learning_rate": 1.3075390808431897e-07, "logits/chosen": -2.33107328414917, "logits/rejected": -2.374955654144287, "logps/chosen": -438.92376708984375, "logps/rejected": -458.869873046875, "loss": 0.5815, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.4029204845428467, "rewards/margins": 0.5280667543411255, "rewards/rejected": -1.9309873580932617, "step": 6930 }, { "epoch": 0.91, "eval_logits/chosen": -2.2373814582824707, "eval_logits/rejected": -2.2452640533447266, "eval_logps/chosen": -480.60675048828125, "eval_logps/rejected": -492.9164733886719, "eval_loss": 0.5998906493186951, "eval_rewards/accuracies": 0.6740000247955322, "eval_rewards/chosen": -1.4785739183425903, "eval_rewards/margins": 0.43949049711227417, "eval_rewards/rejected": -1.9180644750595093, "eval_runtime": 197.0525, "eval_samples_per_second": 10.15, "eval_steps_per_second": 5.075, "step": 6930 }, { "epoch": 0.91, "learning_rate": 1.271330230364262e-07, "logits/chosen": -2.484471559524536, "logits/rejected": -2.485959529876709, "logps/chosen": -447.9771423339844, "logps/rejected": -548.21630859375, "loss": 0.5728, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.4104546308517456, "rewards/margins": 0.5502049326896667, "rewards/rejected": -1.9606596231460571, "step": 6940 }, { "epoch": 0.91, "eval_logits/chosen": -2.237180709838867, "eval_logits/rejected": -2.2449331283569336, "eval_logps/chosen": -480.6366271972656, "eval_logps/rejected": -492.9146728515625, "eval_loss": 0.600125253200531, "eval_rewards/accuracies": 0.6735000014305115, "eval_rewards/chosen": -1.478872299194336, "eval_rewards/margins": 0.4391743242740631, "eval_rewards/rejected": -1.9180465936660767, "eval_runtime": 196.7765, "eval_samples_per_second": 10.164, "eval_steps_per_second": 5.082, "step": 6940 }, { "epoch": 0.91, "learning_rate": 1.2356167167446698e-07, "logits/chosen": -2.468034029006958, "logits/rejected": -2.458634853363037, "logps/chosen": -452.5840759277344, "logps/rejected": -506.529541015625, "loss": 0.6253, "rewards/accuracies": 0.625, "rewards/chosen": -1.5923188924789429, "rewards/margins": 0.39848339557647705, "rewards/rejected": -1.9908021688461304, "step": 6950 }, { "epoch": 0.91, "eval_logits/chosen": -2.2377233505249023, "eval_logits/rejected": -2.245729446411133, "eval_logps/chosen": -480.3329772949219, "eval_logps/rejected": -492.6123046875, "eval_loss": 0.5999380946159363, "eval_rewards/accuracies": 0.6744999885559082, "eval_rewards/chosen": -1.4758356809616089, "eval_rewards/margins": 0.43918731808662415, "eval_rewards/rejected": -1.9150229692459106, "eval_runtime": 196.8314, "eval_samples_per_second": 10.161, "eval_steps_per_second": 5.08, "step": 6950 }, { "epoch": 0.91, "learning_rate": 1.2003992855053326e-07, "logits/chosen": -2.441638231277466, "logits/rejected": -2.3864188194274902, "logps/chosen": -437.62322998046875, "logps/rejected": -482.9440002441406, "loss": 0.5998, "rewards/accuracies": 0.6875, "rewards/chosen": -1.4483708143234253, "rewards/margins": 0.5788255929946899, "rewards/rejected": -2.0271964073181152, "step": 6960 }, { "epoch": 0.91, "eval_logits/chosen": -2.2372143268585205, "eval_logits/rejected": -2.245234966278076, "eval_logps/chosen": -480.1387634277344, "eval_logps/rejected": -492.3878173828125, "eval_loss": 0.5998957753181458, "eval_rewards/accuracies": 0.6744999885559082, "eval_rewards/chosen": -1.473893404006958, "eval_rewards/margins": 0.4388843774795532, "eval_rewards/rejected": -1.9127776622772217, "eval_runtime": 196.9899, "eval_samples_per_second": 10.153, "eval_steps_per_second": 5.076, "step": 6960 }, { "epoch": 0.91, "learning_rate": 1.1656786718114239e-07, "logits/chosen": -2.410566806793213, "logits/rejected": -2.415010690689087, "logps/chosen": -461.14923095703125, "logps/rejected": -489.13330078125, "loss": 0.6105, "rewards/accuracies": 0.6875, "rewards/chosen": -1.4670493602752686, "rewards/margins": 0.4196097254753113, "rewards/rejected": -1.886659026145935, "step": 6970 }, { "epoch": 0.91, "eval_logits/chosen": -2.2372076511383057, "eval_logits/rejected": -2.2453556060791016, "eval_logps/chosen": -479.9434509277344, "eval_logps/rejected": -492.099365234375, "eval_loss": 0.6001380681991577, "eval_rewards/accuracies": 0.6740000247955322, "eval_rewards/chosen": -1.4719403982162476, "eval_rewards/margins": 0.43795305490493774, "eval_rewards/rejected": -1.909893274307251, "eval_runtime": 197.05, "eval_samples_per_second": 10.15, "eval_steps_per_second": 5.075, "step": 6970 }, { "epoch": 0.91, "learning_rate": 1.1314556004570487e-07, "logits/chosen": -2.394918918609619, "logits/rejected": -2.4230122566223145, "logps/chosen": -413.9336853027344, "logps/rejected": -471.86785888671875, "loss": 0.6255, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.4994906187057495, "rewards/margins": 0.32537388801574707, "rewards/rejected": -1.824864387512207, "step": 6980 }, { "epoch": 0.91, "eval_logits/chosen": -2.2366092205047607, "eval_logits/rejected": -2.2446200847625732, "eval_logps/chosen": -480.2217102050781, "eval_logps/rejected": -492.47747802734375, "eval_loss": 0.6001001000404358, "eval_rewards/accuracies": 0.6744999885559082, "eval_rewards/chosen": -1.474722981452942, "eval_rewards/margins": 0.4389515519142151, "eval_rewards/rejected": -1.9136745929718018, "eval_runtime": 197.1272, "eval_samples_per_second": 10.146, "eval_steps_per_second": 5.073, "step": 6980 }, { "epoch": 0.91, "learning_rate": 1.0977307858500818e-07, "logits/chosen": -2.392697811126709, "logits/rejected": -2.3592922687530518, "logps/chosen": -450.70721435546875, "logps/rejected": -447.18206787109375, "loss": 0.5663, "rewards/accuracies": 0.75, "rewards/chosen": -1.3413830995559692, "rewards/margins": 0.42927223443984985, "rewards/rejected": -1.7706553936004639, "step": 6990 }, { "epoch": 0.91, "eval_logits/chosen": -2.237020969390869, "eval_logits/rejected": -2.244997024536133, "eval_logps/chosen": -480.3205261230469, "eval_logps/rejected": -492.5836181640625, "eval_loss": 0.6000609993934631, "eval_rewards/accuracies": 0.6725000143051147, "eval_rewards/chosen": -1.4757108688354492, "eval_rewards/margins": 0.43902501463890076, "eval_rewards/rejected": -1.9147359132766724, "eval_runtime": 196.8134, "eval_samples_per_second": 10.162, "eval_steps_per_second": 5.081, "step": 6990 }, { "epoch": 0.92, "learning_rate": 1.0645049319972789e-07, "logits/chosen": -2.440504550933838, "logits/rejected": -2.380981922149658, "logps/chosen": -461.13299560546875, "logps/rejected": -475.28173828125, "loss": 0.5424, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.4061723947525024, "rewards/margins": 0.6738840937614441, "rewards/rejected": -2.080056667327881, "step": 7000 }, { "epoch": 0.92, "eval_logits/chosen": -2.237301826477051, "eval_logits/rejected": -2.2453017234802246, "eval_logps/chosen": -480.3714904785156, "eval_logps/rejected": -492.61669921875, "eval_loss": 0.600178599357605, "eval_rewards/accuracies": 0.671999990940094, "eval_rewards/chosen": -1.4762206077575684, "eval_rewards/margins": 0.4388462007045746, "eval_rewards/rejected": -1.9150665998458862, "eval_runtime": 196.9518, "eval_samples_per_second": 10.155, "eval_steps_per_second": 5.077, "step": 7000 }, { "epoch": 0.92, "learning_rate": 1.0317787324895634e-07, "logits/chosen": -2.4781394004821777, "logits/rejected": -2.4770684242248535, "logps/chosen": -523.5897827148438, "logps/rejected": -511.9645080566406, "loss": 0.596, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.571276307106018, "rewards/margins": 0.4954506456851959, "rewards/rejected": -2.0667271614074707, "step": 7010 }, { "epoch": 0.92, "eval_logits/chosen": -2.237297296524048, "eval_logits/rejected": -2.24528169631958, "eval_logps/chosen": -480.1894226074219, "eval_logps/rejected": -492.4363708496094, "eval_loss": 0.6001022458076477, "eval_rewards/accuracies": 0.671500027179718, "eval_rewards/chosen": -1.474400520324707, "eval_rewards/margins": 0.43886318802833557, "eval_rewards/rejected": -1.9132635593414307, "eval_runtime": 196.8647, "eval_samples_per_second": 10.159, "eval_steps_per_second": 5.08, "step": 7010 }, { "epoch": 0.92, "learning_rate": 9.995528704875635e-08, "logits/chosen": -2.4749293327331543, "logits/rejected": -2.5007224082946777, "logps/chosen": -449.7769470214844, "logps/rejected": -507.34991455078125, "loss": 0.6293, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.5626052618026733, "rewards/margins": 0.3416301906108856, "rewards/rejected": -1.9042352437973022, "step": 7020 }, { "epoch": 0.92, "eval_logits/chosen": -2.2375288009643555, "eval_logits/rejected": -2.2456140518188477, "eval_logps/chosen": -480.08721923828125, "eval_logps/rejected": -492.32904052734375, "eval_loss": 0.6000053286552429, "eval_rewards/accuracies": 0.6735000014305115, "eval_rewards/chosen": -1.4733775854110718, "eval_rewards/margins": 0.4388121962547302, "eval_rewards/rejected": -1.9121898412704468, "eval_runtime": 197.1161, "eval_samples_per_second": 10.146, "eval_steps_per_second": 5.073, "step": 7020 }, { "epoch": 0.92, "learning_rate": 9.678280187073452e-08, "logits/chosen": -2.376216173171997, "logits/rejected": -2.4128124713897705, "logps/chosen": -457.071533203125, "logps/rejected": -477.45062255859375, "loss": 0.5241, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.2783294916152954, "rewards/margins": 0.6212576031684875, "rewards/rejected": -1.8995869159698486, "step": 7030 }, { "epoch": 0.92, "eval_logits/chosen": -2.237093687057495, "eval_logits/rejected": -2.2451350688934326, "eval_logps/chosen": -480.2182922363281, "eval_logps/rejected": -492.4925537109375, "eval_loss": 0.6000233888626099, "eval_rewards/accuracies": 0.6729999780654907, "eval_rewards/chosen": -1.4746882915496826, "eval_rewards/margins": 0.43913722038269043, "eval_rewards/rejected": -1.913825273513794, "eval_runtime": 197.0562, "eval_samples_per_second": 10.149, "eval_steps_per_second": 5.075, "step": 7030 }, { "epoch": 0.92, "learning_rate": 9.366048394063549e-08, "logits/chosen": -2.531467914581299, "logits/rejected": -2.4971137046813965, "logps/chosen": -474.6029357910156, "logps/rejected": -536.8944091796875, "loss": 0.5432, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.343653678894043, "rewards/margins": 0.6326344013214111, "rewards/rejected": -1.976288080215454, "step": 7040 }, { "epoch": 0.92, "eval_logits/chosen": -2.2370364665985107, "eval_logits/rejected": -2.2448863983154297, "eval_logps/chosen": -480.6302490234375, "eval_logps/rejected": -492.96478271484375, "eval_loss": 0.5998647809028625, "eval_rewards/accuracies": 0.6735000014305115, "eval_rewards/chosen": -1.4788081645965576, "eval_rewards/margins": 0.4397394359111786, "eval_rewards/rejected": -1.9185476303100586, "eval_runtime": 197.1021, "eval_samples_per_second": 10.147, "eval_steps_per_second": 5.074, "step": 7040 }, { "epoch": 0.92, "learning_rate": 9.058839843696237e-08, "logits/chosen": -2.4858269691467285, "logits/rejected": -2.4405789375305176, "logps/chosen": -480.81610107421875, "logps/rejected": -499.29461669921875, "loss": 0.5755, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.399253010749817, "rewards/margins": 0.5345474481582642, "rewards/rejected": -1.933800458908081, "step": 7050 }, { "epoch": 0.92, "eval_logits/chosen": -2.2368223667144775, "eval_logits/rejected": -2.244842767715454, "eval_logps/chosen": -480.903564453125, "eval_logps/rejected": -493.2392272949219, "eval_loss": 0.6001678705215454, "eval_rewards/accuracies": 0.671500027179718, "eval_rewards/chosen": -1.4815417528152466, "eval_rewards/margins": 0.439750075340271, "eval_rewards/rejected": -1.9212918281555176, "eval_runtime": 197.05, "eval_samples_per_second": 10.15, "eval_steps_per_second": 5.075, "step": 7050 }, { "epoch": 0.92, "learning_rate": 8.756660948961299e-08, "logits/chosen": -2.4491777420043945, "logits/rejected": -2.463347911834717, "logps/chosen": -449.61077880859375, "logps/rejected": -495.1546325683594, "loss": 0.6344, "rewards/accuracies": 0.625, "rewards/chosen": -1.45897376537323, "rewards/margins": 0.3097809851169586, "rewards/rejected": -1.7687549591064453, "step": 7060 }, { "epoch": 0.92, "eval_logits/chosen": -2.236321210861206, "eval_logits/rejected": -2.244218349456787, "eval_logps/chosen": -481.0281066894531, "eval_logps/rejected": -493.4416809082031, "eval_loss": 0.5998026132583618, "eval_rewards/accuracies": 0.6744999885559082, "eval_rewards/chosen": -1.482786774635315, "eval_rewards/margins": 0.4405299723148346, "eval_rewards/rejected": -1.9233167171478271, "eval_runtime": 197.0403, "eval_samples_per_second": 10.15, "eval_steps_per_second": 5.075, "step": 7060 }, { "epoch": 0.93, "learning_rate": 8.459518017854412e-08, "logits/chosen": -2.436307907104492, "logits/rejected": -2.4082484245300293, "logps/chosen": -488.6837463378906, "logps/rejected": -466.79510498046875, "loss": 0.659, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.5326035022735596, "rewards/margins": 0.22640573978424072, "rewards/rejected": -1.7590093612670898, "step": 7070 }, { "epoch": 0.93, "eval_logits/chosen": -2.23673677444458, "eval_logits/rejected": -2.2447004318237305, "eval_logps/chosen": -481.2086181640625, "eval_logps/rejected": -493.5932312011719, "eval_loss": 0.5999945998191833, "eval_rewards/accuracies": 0.6735000014305115, "eval_rewards/chosen": -1.484592318534851, "eval_rewards/margins": 0.44023993611335754, "eval_rewards/rejected": -1.9248321056365967, "eval_runtime": 197.134, "eval_samples_per_second": 10.145, "eval_steps_per_second": 5.073, "step": 7070 }, { "epoch": 0.93, "learning_rate": 8.167417253245213e-08, "logits/chosen": -2.4186267852783203, "logits/rejected": -2.365056037902832, "logps/chosen": -457.72088623046875, "logps/rejected": -458.7981872558594, "loss": 0.5947, "rewards/accuracies": 0.625, "rewards/chosen": -1.4014866352081299, "rewards/margins": 0.3693445324897766, "rewards/rejected": -1.7708311080932617, "step": 7080 }, { "epoch": 0.93, "eval_logits/chosen": -2.235661268234253, "eval_logits/rejected": -2.2434422969818115, "eval_logps/chosen": -481.47943115234375, "eval_logps/rejected": -493.9435729980469, "eval_loss": 0.5998866558074951, "eval_rewards/accuracies": 0.6744999885559082, "eval_rewards/chosen": -1.487300157546997, "eval_rewards/margins": 0.44103503227233887, "eval_rewards/rejected": -1.928335189819336, "eval_runtime": 197.0791, "eval_samples_per_second": 10.148, "eval_steps_per_second": 5.074, "step": 7080 }, { "epoch": 0.93, "learning_rate": 7.880364752747948e-08, "logits/chosen": -2.4743261337280273, "logits/rejected": -2.464456558227539, "logps/chosen": -449.85626220703125, "logps/rejected": -484.47998046875, "loss": 0.628, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.6029231548309326, "rewards/margins": 0.3753909170627594, "rewards/rejected": -1.9783141613006592, "step": 7090 }, { "epoch": 0.93, "eval_logits/chosen": -2.2360057830810547, "eval_logits/rejected": -2.2437164783477783, "eval_logps/chosen": -481.5184020996094, "eval_logps/rejected": -493.98126220703125, "eval_loss": 0.5998888611793518, "eval_rewards/accuracies": 0.6725000143051147, "eval_rewards/chosen": -1.4876903295516968, "eval_rewards/margins": 0.4410220980644226, "eval_rewards/rejected": -1.9287123680114746, "eval_runtime": 197.1071, "eval_samples_per_second": 10.147, "eval_steps_per_second": 5.073, "step": 7090 }, { "epoch": 0.93, "learning_rate": 7.598366508594245e-08, "logits/chosen": -2.3809356689453125, "logits/rejected": -2.394191026687622, "logps/chosen": -520.0443115234375, "logps/rejected": -556.1658935546875, "loss": 0.5261, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.521211862564087, "rewards/margins": 0.6119467616081238, "rewards/rejected": -2.1331584453582764, "step": 7100 }, { "epoch": 0.93, "eval_logits/chosen": -2.2363462448120117, "eval_logits/rejected": -2.2442967891693115, "eval_logps/chosen": -481.386474609375, "eval_logps/rejected": -493.82806396484375, "eval_loss": 0.5999638438224792, "eval_rewards/accuracies": 0.6729999780654907, "eval_rewards/chosen": -1.4863704442977905, "eval_rewards/margins": 0.44081002473831177, "eval_rewards/rejected": -1.9271804094314575, "eval_runtime": 197.3044, "eval_samples_per_second": 10.137, "eval_steps_per_second": 5.068, "step": 7100 }, { "epoch": 0.93, "learning_rate": 7.32142840750788e-08, "logits/chosen": -2.4369311332702637, "logits/rejected": -2.396646499633789, "logps/chosen": -509.541015625, "logps/rejected": -515.9998779296875, "loss": 0.5201, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.364022135734558, "rewards/margins": 0.608812153339386, "rewards/rejected": -1.9728343486785889, "step": 7110 }, { "epoch": 0.93, "eval_logits/chosen": -2.236448287963867, "eval_logits/rejected": -2.2443768978118896, "eval_logps/chosen": -481.1351318359375, "eval_logps/rejected": -493.5433654785156, "eval_loss": 0.6000708937644958, "eval_rewards/accuracies": 0.6729999780654907, "eval_rewards/chosen": -1.4838569164276123, "eval_rewards/margins": 0.44047674536705017, "eval_rewards/rejected": -1.9243335723876953, "eval_runtime": 197.1301, "eval_samples_per_second": 10.146, "eval_steps_per_second": 5.073, "step": 7110 }, { "epoch": 0.93, "learning_rate": 7.049556230581872e-08, "logits/chosen": -2.3801629543304443, "logits/rejected": -2.3011727333068848, "logps/chosen": -450.39837646484375, "logps/rejected": -455.0599060058594, "loss": 0.6504, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.612126111984253, "rewards/margins": 0.34414222836494446, "rewards/rejected": -1.9562686681747437, "step": 7120 }, { "epoch": 0.93, "eval_logits/chosen": -2.236114740371704, "eval_logits/rejected": -2.24397873878479, "eval_logps/chosen": -481.0938720703125, "eval_logps/rejected": -493.5276184082031, "eval_loss": 0.6000164151191711, "eval_rewards/accuracies": 0.6725000143051147, "eval_rewards/chosen": -1.4834445714950562, "eval_rewards/margins": 0.44073137640953064, "eval_rewards/rejected": -1.9241758584976196, "eval_runtime": 197.0056, "eval_samples_per_second": 10.152, "eval_steps_per_second": 5.076, "step": 7120 }, { "epoch": 0.93, "learning_rate": 6.782755653158085e-08, "logits/chosen": -2.495652437210083, "logits/rejected": -2.4827919006347656, "logps/chosen": -485.154296875, "logps/rejected": -492.5819396972656, "loss": 0.5956, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.4519439935684204, "rewards/margins": 0.38830724358558655, "rewards/rejected": -1.8402513265609741, "step": 7130 }, { "epoch": 0.93, "eval_logits/chosen": -2.236171007156372, "eval_logits/rejected": -2.2441015243530273, "eval_logps/chosen": -480.94415283203125, "eval_logps/rejected": -493.3447570800781, "eval_loss": 0.6001508235931396, "eval_rewards/accuracies": 0.6744999885559082, "eval_rewards/chosen": -1.4819475412368774, "eval_rewards/margins": 0.4403998851776123, "eval_rewards/rejected": -1.9223475456237793, "eval_runtime": 196.9017, "eval_samples_per_second": 10.157, "eval_steps_per_second": 5.079, "step": 7130 }, { "epoch": 0.93, "learning_rate": 6.521032244708375e-08, "logits/chosen": -2.3476414680480957, "logits/rejected": -2.3627238273620605, "logps/chosen": -479.8387145996094, "logps/rejected": -507.6990661621094, "loss": 0.67, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.5138092041015625, "rewards/margins": 0.3506276309490204, "rewards/rejected": -1.8644367456436157, "step": 7140 }, { "epoch": 0.93, "eval_logits/chosen": -2.2358126640319824, "eval_logits/rejected": -2.2438180446624756, "eval_logps/chosen": -480.76470947265625, "eval_logps/rejected": -493.1570739746094, "eval_loss": 0.6001284718513489, "eval_rewards/accuracies": 0.6754999756813049, "eval_rewards/chosen": -1.48015296459198, "eval_rewards/margins": 0.4403176009654999, "eval_rewards/rejected": -1.9204705953598022, "eval_runtime": 196.9924, "eval_samples_per_second": 10.153, "eval_steps_per_second": 5.076, "step": 7140 }, { "epoch": 0.94, "learning_rate": 6.264391468718628e-08, "logits/chosen": -2.483029842376709, "logits/rejected": -2.448090076446533, "logps/chosen": -470.856201171875, "logps/rejected": -495.6634216308594, "loss": 0.5571, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.3123161792755127, "rewards/margins": 0.5451647043228149, "rewards/rejected": -1.857480764389038, "step": 7150 }, { "epoch": 0.94, "eval_logits/chosen": -2.2361068725585938, "eval_logits/rejected": -2.244180679321289, "eval_logps/chosen": -480.6026306152344, "eval_logps/rejected": -492.9624938964844, "eval_loss": 0.6001822352409363, "eval_rewards/accuracies": 0.6744999885559082, "eval_rewards/chosen": -1.4785321950912476, "eval_rewards/margins": 0.4399925172328949, "eval_rewards/rejected": -1.9185247421264648, "eval_runtime": 197.2989, "eval_samples_per_second": 10.137, "eval_steps_per_second": 5.068, "step": 7150 }, { "epoch": 0.94, "learning_rate": 6.012838682574462e-08, "logits/chosen": -2.557973861694336, "logits/rejected": -2.5231175422668457, "logps/chosen": -490.7843322753906, "logps/rejected": -462.07733154296875, "loss": 0.6067, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.563360571861267, "rewards/margins": 0.4491299092769623, "rewards/rejected": -2.0124905109405518, "step": 7160 }, { "epoch": 0.94, "eval_logits/chosen": -2.23598575592041, "eval_logits/rejected": -2.2441365718841553, "eval_logps/chosen": -480.674072265625, "eval_logps/rejected": -493.0469665527344, "eval_loss": 0.6001749038696289, "eval_rewards/accuracies": 0.6759999990463257, "eval_rewards/chosen": -1.4792464971542358, "eval_rewards/margins": 0.44012314081192017, "eval_rewards/rejected": -1.9193694591522217, "eval_runtime": 197.017, "eval_samples_per_second": 10.151, "eval_steps_per_second": 5.076, "step": 7160 }, { "epoch": 0.94, "learning_rate": 5.766379137449624e-08, "logits/chosen": -2.5023579597473145, "logits/rejected": -2.476633310317993, "logps/chosen": -426.63067626953125, "logps/rejected": -493.447265625, "loss": 0.567, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.3543007373809814, "rewards/margins": 0.5005929470062256, "rewards/rejected": -1.854893684387207, "step": 7170 }, { "epoch": 0.94, "eval_logits/chosen": -2.236705780029297, "eval_logits/rejected": -2.2447779178619385, "eval_logps/chosen": -480.6815185546875, "eval_logps/rejected": -493.0613708496094, "eval_loss": 0.6001842021942139, "eval_rewards/accuracies": 0.6725000143051147, "eval_rewards/chosen": -1.4793212413787842, "eval_rewards/margins": 0.44019201397895813, "eval_rewards/rejected": -1.91951322555542, "eval_runtime": 196.9448, "eval_samples_per_second": 10.155, "eval_steps_per_second": 5.078, "step": 7170 }, { "epoch": 0.94, "learning_rate": 5.525017978196295e-08, "logits/chosen": -2.523089647293091, "logits/rejected": -2.468512773513794, "logps/chosen": -509.1969299316406, "logps/rejected": -495.98358154296875, "loss": 0.5853, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.4794842004776, "rewards/margins": 0.548646092414856, "rewards/rejected": -2.028130292892456, "step": 7180 }, { "epoch": 0.94, "eval_logits/chosen": -2.235506296157837, "eval_logits/rejected": -2.2435786724090576, "eval_logps/chosen": -480.6484069824219, "eval_logps/rejected": -493.06866455078125, "eval_loss": 0.5999549627304077, "eval_rewards/accuracies": 0.6740000247955322, "eval_rewards/chosen": -1.4789897203445435, "eval_rewards/margins": 0.44059687852859497, "eval_rewards/rejected": -1.9195865392684937, "eval_runtime": 196.8598, "eval_samples_per_second": 10.16, "eval_steps_per_second": 5.08, "step": 7180 }, { "epoch": 0.94, "learning_rate": 5.288760243237545e-08, "logits/chosen": -2.4585039615631104, "logits/rejected": -2.401052951812744, "logps/chosen": -526.7545166015625, "logps/rejected": -510.090087890625, "loss": 0.5763, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.4466525316238403, "rewards/margins": 0.5509330630302429, "rewards/rejected": -1.9975858926773071, "step": 7190 }, { "epoch": 0.94, "eval_logits/chosen": -2.235445976257324, "eval_logits/rejected": -2.243375062942505, "eval_logps/chosen": -480.7488098144531, "eval_logps/rejected": -493.1495666503906, "eval_loss": 0.6000896096229553, "eval_rewards/accuracies": 0.675000011920929, "eval_rewards/chosen": -1.4799941778182983, "eval_rewards/margins": 0.44040152430534363, "eval_rewards/rejected": -1.9203956127166748, "eval_runtime": 197.0246, "eval_samples_per_second": 10.151, "eval_steps_per_second": 5.076, "step": 7190 }, { "epoch": 0.94, "learning_rate": 5.0576108644623536e-08, "logits/chosen": -2.3365859985351562, "logits/rejected": -2.2916181087493896, "logps/chosen": -525.1251220703125, "logps/rejected": -482.224853515625, "loss": 0.6433, "rewards/accuracies": 0.625, "rewards/chosen": -1.5160186290740967, "rewards/margins": 0.3626102805137634, "rewards/rejected": -1.8786289691925049, "step": 7200 }, { "epoch": 0.94, "eval_logits/chosen": -2.235518455505371, "eval_logits/rejected": -2.2436070442199707, "eval_logps/chosen": -480.7719421386719, "eval_logps/rejected": -493.13995361328125, "eval_loss": 0.600199282169342, "eval_rewards/accuracies": 0.6744999885559082, "eval_rewards/chosen": -1.4802253246307373, "eval_rewards/margins": 0.4400743544101715, "eval_rewards/rejected": -1.9202996492385864, "eval_runtime": 197.1412, "eval_samples_per_second": 10.145, "eval_steps_per_second": 5.073, "step": 7200 }, { "epoch": 0.94, "learning_rate": 4.8315746671225296e-08, "logits/chosen": -2.4270944595336914, "logits/rejected": -2.376451015472412, "logps/chosen": -497.7478942871094, "logps/rejected": -515.0045776367188, "loss": 0.5246, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.2914670705795288, "rewards/margins": 0.5625424385070801, "rewards/rejected": -1.8540096282958984, "step": 7210 }, { "epoch": 0.94, "eval_logits/chosen": -2.2360446453094482, "eval_logits/rejected": -2.2440760135650635, "eval_logps/chosen": -480.86669921875, "eval_logps/rejected": -493.27886962890625, "eval_loss": 0.6001480221748352, "eval_rewards/accuracies": 0.6744999885559082, "eval_rewards/chosen": -1.4811723232269287, "eval_rewards/margins": 0.4405162036418915, "eval_rewards/rejected": -1.9216883182525635, "eval_runtime": 196.9347, "eval_samples_per_second": 10.156, "eval_steps_per_second": 5.078, "step": 7210 }, { "epoch": 0.94, "learning_rate": 4.6106563697320695e-08, "logits/chosen": -2.457677125930786, "logits/rejected": -2.44500470161438, "logps/chosen": -434.3646545410156, "logps/rejected": -441.17852783203125, "loss": 0.5472, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.2899720668792725, "rewards/margins": 0.6157953143119812, "rewards/rejected": -1.9057674407958984, "step": 7220 }, { "epoch": 0.94, "eval_logits/chosen": -2.2354533672332764, "eval_logits/rejected": -2.2435836791992188, "eval_logps/chosen": -480.8843688964844, "eval_logps/rejected": -493.26123046875, "eval_loss": 0.6003187894821167, "eval_rewards/accuracies": 0.6744999885559082, "eval_rewards/chosen": -1.4813494682312012, "eval_rewards/margins": 0.44016218185424805, "eval_rewards/rejected": -1.9215115308761597, "eval_runtime": 197.2506, "eval_samples_per_second": 10.139, "eval_steps_per_second": 5.07, "step": 7220 }, { "epoch": 0.95, "learning_rate": 4.394860583968624e-08, "logits/chosen": -2.489647150039673, "logits/rejected": -2.498183250427246, "logps/chosen": -397.114990234375, "logps/rejected": -462.450927734375, "loss": 0.6258, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -1.455381155014038, "rewards/margins": 0.3950461447238922, "rewards/rejected": -1.850427269935608, "step": 7230 }, { "epoch": 0.95, "eval_logits/chosen": -2.2355403900146484, "eval_logits/rejected": -2.2435503005981445, "eval_logps/chosen": -480.9859313964844, "eval_logps/rejected": -493.4081115722656, "eval_loss": 0.6001339554786682, "eval_rewards/accuracies": 0.671500027179718, "eval_rewards/chosen": -1.4823654890060425, "eval_rewards/margins": 0.4406152665615082, "eval_rewards/rejected": -1.9229806661605835, "eval_runtime": 196.8587, "eval_samples_per_second": 10.16, "eval_steps_per_second": 5.08, "step": 7230 }, { "epoch": 0.95, "learning_rate": 4.1841918145771874e-08, "logits/chosen": -2.379164218902588, "logits/rejected": -2.3571412563323975, "logps/chosen": -482.9966735839844, "logps/rejected": -512.3907470703125, "loss": 0.5519, "rewards/accuracies": 0.75, "rewards/chosen": -1.4066792726516724, "rewards/margins": 0.5344252586364746, "rewards/rejected": -1.941104531288147, "step": 7240 }, { "epoch": 0.95, "eval_logits/chosen": -2.2356574535369873, "eval_logits/rejected": -2.243659496307373, "eval_logps/chosen": -480.8147888183594, "eval_logps/rejected": -493.2166748046875, "eval_loss": 0.600143313407898, "eval_rewards/accuracies": 0.6725000143051147, "eval_rewards/chosen": -1.4806538820266724, "eval_rewards/margins": 0.4404126703739166, "eval_rewards/rejected": -1.921066403388977, "eval_runtime": 197.1276, "eval_samples_per_second": 10.146, "eval_steps_per_second": 5.073, "step": 7240 }, { "epoch": 0.95, "learning_rate": 3.978654459276088e-08, "logits/chosen": -2.5380361080169678, "logits/rejected": -2.518141269683838, "logps/chosen": -526.7445068359375, "logps/rejected": -505.240234375, "loss": 0.5803, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.4389846324920654, "rewards/margins": 0.531572163105011, "rewards/rejected": -1.9705572128295898, "step": 7250 }, { "epoch": 0.95, "eval_logits/chosen": -2.235908269882202, "eval_logits/rejected": -2.2438931465148926, "eval_logps/chosen": -480.80810546875, "eval_logps/rejected": -493.21368408203125, "eval_loss": 0.600168764591217, "eval_rewards/accuracies": 0.6754999756813049, "eval_rewards/chosen": -1.4805866479873657, "eval_rewards/margins": 0.4404502213001251, "eval_rewards/rejected": -1.921036958694458, "eval_runtime": 197.0241, "eval_samples_per_second": 10.151, "eval_steps_per_second": 5.076, "step": 7250 }, { "epoch": 0.95, "learning_rate": 3.778252808665284e-08, "logits/chosen": -2.5763096809387207, "logits/rejected": -2.581481456756592, "logps/chosen": -542.89453125, "logps/rejected": -504.26971435546875, "loss": 0.566, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.4922627210617065, "rewards/margins": 0.5086614489555359, "rewards/rejected": -2.0009243488311768, "step": 7260 }, { "epoch": 0.95, "eval_logits/chosen": -2.235844612121582, "eval_logits/rejected": -2.2437777519226074, "eval_logps/chosen": -480.9935607910156, "eval_logps/rejected": -493.4316101074219, "eval_loss": 0.6000894904136658, "eval_rewards/accuracies": 0.6759999990463257, "eval_rewards/chosen": -1.4824414253234863, "eval_rewards/margins": 0.4407746493816376, "eval_rewards/rejected": -1.9232161045074463, "eval_runtime": 196.9012, "eval_samples_per_second": 10.157, "eval_steps_per_second": 5.079, "step": 7260 }, { "epoch": 0.95, "learning_rate": 3.5829910461366023e-08, "logits/chosen": -2.401991367340088, "logits/rejected": -2.417039632797241, "logps/chosen": -438.27191162109375, "logps/rejected": -485.9276428222656, "loss": 0.5685, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.3814996480941772, "rewards/margins": 0.601586639881134, "rewards/rejected": -1.983086347579956, "step": 7270 }, { "epoch": 0.95, "eval_logits/chosen": -2.234957218170166, "eval_logits/rejected": -2.242936611175537, "eval_logps/chosen": -481.03302001953125, "eval_logps/rejected": -493.4700622558594, "eval_loss": 0.6001612544059753, "eval_rewards/accuracies": 0.6744999885559082, "eval_rewards/chosen": -1.482836127281189, "eval_rewards/margins": 0.44076380133628845, "eval_rewards/rejected": -1.9235999584197998, "eval_runtime": 196.9569, "eval_samples_per_second": 10.155, "eval_steps_per_second": 5.077, "step": 7270 }, { "epoch": 0.95, "learning_rate": 3.39287324778656e-08, "logits/chosen": -2.5495078563690186, "logits/rejected": -2.5458648204803467, "logps/chosen": -551.0210571289062, "logps/rejected": -540.4073486328125, "loss": 0.6324, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.5627120733261108, "rewards/margins": 0.4206581115722656, "rewards/rejected": -1.9833701848983765, "step": 7280 }, { "epoch": 0.95, "eval_logits/chosen": -2.2355172634124756, "eval_logits/rejected": -2.2434380054473877, "eval_logps/chosen": -481.1297607421875, "eval_logps/rejected": -493.6228332519531, "eval_loss": 0.6000157594680786, "eval_rewards/accuracies": 0.6740000247955322, "eval_rewards/chosen": -1.4838035106658936, "eval_rewards/margins": 0.44132480025291443, "eval_rewards/rejected": -1.9251282215118408, "eval_runtime": 196.9853, "eval_samples_per_second": 10.153, "eval_steps_per_second": 5.077, "step": 7280 }, { "epoch": 0.95, "learning_rate": 3.207903382331262e-08, "logits/chosen": -2.4150776863098145, "logits/rejected": -2.4447875022888184, "logps/chosen": -505.7635192871094, "logps/rejected": -486.01513671875, "loss": 0.6182, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -1.3768532276153564, "rewards/margins": 0.3623526394367218, "rewards/rejected": -1.7392059564590454, "step": 7290 }, { "epoch": 0.95, "eval_logits/chosen": -2.235156297683716, "eval_logits/rejected": -2.2431724071502686, "eval_logps/chosen": -481.0028076171875, "eval_logps/rejected": -493.4009704589844, "eval_loss": 0.600363552570343, "eval_rewards/accuracies": 0.6735000014305115, "eval_rewards/chosen": -1.482533574104309, "eval_rewards/margins": 0.4403752386569977, "eval_rewards/rejected": -1.9229090213775635, "eval_runtime": 197.2199, "eval_samples_per_second": 10.141, "eval_steps_per_second": 5.07, "step": 7290 }, { "epoch": 0.96, "learning_rate": 3.028085311023443e-08, "logits/chosen": -2.3501362800598145, "logits/rejected": -2.3338279724121094, "logps/chosen": -482.4646911621094, "logps/rejected": -484.25885009765625, "loss": 0.5617, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.3721768856048584, "rewards/margins": 0.5388258695602417, "rewards/rejected": -1.9110028743743896, "step": 7300 }, { "epoch": 0.96, "eval_logits/chosen": -2.2347323894500732, "eval_logits/rejected": -2.242525339126587, "eval_logps/chosen": -481.16748046875, "eval_logps/rejected": -493.6541442871094, "eval_loss": 0.6000457406044006, "eval_rewards/accuracies": 0.675000011920929, "eval_rewards/chosen": -1.4841809272766113, "eval_rewards/margins": 0.4412601888179779, "eval_rewards/rejected": -1.925441026687622, "eval_runtime": 197.1805, "eval_samples_per_second": 10.143, "eval_steps_per_second": 5.071, "step": 7300 }, { "epoch": 0.96, "learning_rate": 2.8534227875720576e-08, "logits/chosen": -2.493821620941162, "logits/rejected": -2.4801414012908936, "logps/chosen": -466.4181213378906, "logps/rejected": -507.5314025878906, "loss": 0.5804, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.4337350130081177, "rewards/margins": 0.4854932427406311, "rewards/rejected": -1.9192283153533936, "step": 7310 }, { "epoch": 0.96, "eval_logits/chosen": -2.2344577312469482, "eval_logits/rejected": -2.2422938346862793, "eval_logps/chosen": -481.3323669433594, "eval_logps/rejected": -493.8113708496094, "eval_loss": 0.600138783454895, "eval_rewards/accuracies": 0.6754999756813049, "eval_rewards/chosen": -1.4858298301696777, "eval_rewards/margins": 0.44118383526802063, "eval_rewards/rejected": -1.927013635635376, "eval_runtime": 196.9791, "eval_samples_per_second": 10.153, "eval_steps_per_second": 5.077, "step": 7310 }, { "epoch": 0.96, "learning_rate": 2.683919458063705e-08, "logits/chosen": -2.486636161804199, "logits/rejected": -2.414386749267578, "logps/chosen": -405.9773864746094, "logps/rejected": -394.66607666015625, "loss": 0.5918, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.4356229305267334, "rewards/margins": 0.4635559916496277, "rewards/rejected": -1.8991791009902954, "step": 7320 }, { "epoch": 0.96, "eval_logits/chosen": -2.2344415187835693, "eval_logits/rejected": -2.242302656173706, "eval_logps/chosen": -481.2685546875, "eval_logps/rejected": -493.75445556640625, "eval_loss": 0.6001518368721008, "eval_rewards/accuracies": 0.6754999756813049, "eval_rewards/chosen": -1.4851912260055542, "eval_rewards/margins": 0.4412528872489929, "eval_rewards/rejected": -1.9264440536499023, "eval_runtime": 197.6057, "eval_samples_per_second": 10.121, "eval_steps_per_second": 5.061, "step": 7320 }, { "epoch": 0.96, "learning_rate": 2.5195788608866345e-08, "logits/chosen": -2.381263017654419, "logits/rejected": -2.317131280899048, "logps/chosen": -571.0789794921875, "logps/rejected": -538.0128784179688, "loss": 0.5686, "rewards/accuracies": 0.6875, "rewards/chosen": -1.586227297782898, "rewards/margins": 0.5877612233161926, "rewards/rejected": -2.1739885807037354, "step": 7330 }, { "epoch": 0.96, "eval_logits/chosen": -2.2345898151397705, "eval_logits/rejected": -2.242624282836914, "eval_logps/chosen": -481.3023986816406, "eval_logps/rejected": -493.7621154785156, "eval_loss": 0.6001543998718262, "eval_rewards/accuracies": 0.6754999756813049, "eval_rewards/chosen": -1.4855300188064575, "eval_rewards/margins": 0.4409913420677185, "eval_rewards/rejected": -1.9265215396881104, "eval_runtime": 197.0845, "eval_samples_per_second": 10.148, "eval_steps_per_second": 5.074, "step": 7330 }, { "epoch": 0.96, "learning_rate": 2.3604044266569426e-08, "logits/chosen": -2.4356443881988525, "logits/rejected": -2.3660850524902344, "logps/chosen": -499.91949462890625, "logps/rejected": -484.0068359375, "loss": 0.6209, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.5768749713897705, "rewards/margins": 0.38860636949539185, "rewards/rejected": -1.9654814004898071, "step": 7340 }, { "epoch": 0.96, "eval_logits/chosen": -2.2345218658447266, "eval_logits/rejected": -2.2423171997070312, "eval_logps/chosen": -481.2668762207031, "eval_logps/rejected": -493.7497863769531, "eval_loss": 0.6001455783843994, "eval_rewards/accuracies": 0.6735000014305115, "eval_rewards/chosen": -1.485174536705017, "eval_rewards/margins": 0.4412229061126709, "eval_rewards/rejected": -1.926397442817688, "eval_runtime": 196.7867, "eval_samples_per_second": 10.163, "eval_steps_per_second": 5.082, "step": 7340 }, { "epoch": 0.96, "learning_rate": 2.2063994781468256e-08, "logits/chosen": -2.3938355445861816, "logits/rejected": -2.4135689735412598, "logps/chosen": -474.46148681640625, "logps/rejected": -478.35601806640625, "loss": 0.6103, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.480282187461853, "rewards/margins": 0.47738590836524963, "rewards/rejected": -1.9576680660247803, "step": 7350 }, { "epoch": 0.96, "eval_logits/chosen": -2.234881639480591, "eval_logits/rejected": -2.2427618503570557, "eval_logps/chosen": -481.1435852050781, "eval_logps/rejected": -493.56268310546875, "eval_loss": 0.600322961807251, "eval_rewards/accuracies": 0.6765000224113464, "eval_rewards/chosen": -1.4839422702789307, "eval_rewards/margins": 0.44058436155319214, "eval_rewards/rejected": -1.9245266914367676, "eval_runtime": 196.7921, "eval_samples_per_second": 10.163, "eval_steps_per_second": 5.082, "step": 7350 }, { "epoch": 0.96, "learning_rate": 2.057567230215246e-08, "logits/chosen": -2.5192952156066895, "logits/rejected": -2.5412585735321045, "logps/chosen": -486.6336364746094, "logps/rejected": -526.0662841796875, "loss": 0.6426, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.5905240774154663, "rewards/margins": 0.31872352957725525, "rewards/rejected": -1.9092477560043335, "step": 7360 }, { "epoch": 0.96, "eval_logits/chosen": -2.2351341247558594, "eval_logits/rejected": -2.2430951595306396, "eval_logps/chosen": -481.04461669921875, "eval_logps/rejected": -493.49676513671875, "eval_loss": 0.6000579595565796, "eval_rewards/accuracies": 0.6735000014305115, "eval_rewards/chosen": -1.4829521179199219, "eval_rewards/margins": 0.44091513752937317, "eval_rewards/rejected": -1.9238673448562622, "eval_runtime": 196.971, "eval_samples_per_second": 10.154, "eval_steps_per_second": 5.077, "step": 7360 }, { "epoch": 0.96, "learning_rate": 1.9139107897409303e-08, "logits/chosen": -2.3607892990112305, "logits/rejected": -2.328470468521118, "logps/chosen": -502.33673095703125, "logps/rejected": -484.7545471191406, "loss": 0.5438, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.4793694019317627, "rewards/margins": 0.5975691080093384, "rewards/rejected": -2.0769386291503906, "step": 7370 }, { "epoch": 0.96, "eval_logits/chosen": -2.2346956729888916, "eval_logits/rejected": -2.2427361011505127, "eval_logps/chosen": -481.18170166015625, "eval_logps/rejected": -493.6324768066406, "eval_loss": 0.600149393081665, "eval_rewards/accuracies": 0.671999990940094, "eval_rewards/chosen": -1.4843229055404663, "eval_rewards/margins": 0.4409013092517853, "eval_rewards/rejected": -1.9252241849899292, "eval_runtime": 196.8729, "eval_samples_per_second": 10.159, "eval_steps_per_second": 5.079, "step": 7370 }, { "epoch": 0.97, "learning_rate": 1.7754331555573656e-08, "logits/chosen": -2.5656070709228516, "logits/rejected": -2.546877384185791, "logps/chosen": -496.11834716796875, "logps/rejected": -562.813232421875, "loss": 0.6082, "rewards/accuracies": 0.6875, "rewards/chosen": -1.4974877834320068, "rewards/margins": 0.4004742503166199, "rewards/rejected": -1.897962212562561, "step": 7380 }, { "epoch": 0.97, "eval_logits/chosen": -2.235090970993042, "eval_logits/rejected": -2.2430014610290527, "eval_logps/chosen": -480.996826171875, "eval_logps/rejected": -493.45916748046875, "eval_loss": 0.6000385880470276, "eval_rewards/accuracies": 0.6735000014305115, "eval_rewards/chosen": -1.4824742078781128, "eval_rewards/margins": 0.44101738929748535, "eval_rewards/rejected": -1.9234915971755981, "eval_runtime": 196.9341, "eval_samples_per_second": 10.156, "eval_steps_per_second": 5.078, "step": 7380 }, { "epoch": 0.97, "learning_rate": 1.642137218390294e-08, "logits/chosen": -2.5074877738952637, "logits/rejected": -2.4454264640808105, "logps/chosen": -510.3954162597656, "logps/rejected": -490.68426513671875, "loss": 0.6119, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.5543279647827148, "rewards/margins": 0.4863724708557129, "rewards/rejected": -2.0407004356384277, "step": 7390 }, { "epoch": 0.97, "eval_logits/chosen": -2.235023260116577, "eval_logits/rejected": -2.2430419921875, "eval_logps/chosen": -480.9540710449219, "eval_logps/rejected": -493.37200927734375, "eval_loss": 0.6001395583152771, "eval_rewards/accuracies": 0.6725000143051147, "eval_rewards/chosen": -1.4820466041564941, "eval_rewards/margins": 0.44057348370552063, "eval_rewards/rejected": -1.9226198196411133, "eval_runtime": 197.0167, "eval_samples_per_second": 10.151, "eval_steps_per_second": 5.076, "step": 7390 }, { "epoch": 0.97, "learning_rate": 1.514025760797344e-08, "logits/chosen": -2.5765981674194336, "logits/rejected": -2.5297365188598633, "logps/chosen": -520.4789428710938, "logps/rejected": -500.82781982421875, "loss": 0.5542, "rewards/accuracies": 0.6875, "rewards/chosen": -1.3169505596160889, "rewards/margins": 0.6009668111801147, "rewards/rejected": -1.917917251586914, "step": 7400 }, { "epoch": 0.97, "eval_logits/chosen": -2.2352027893066406, "eval_logits/rejected": -2.2430360317230225, "eval_logps/chosen": -480.98291015625, "eval_logps/rejected": -493.39874267578125, "eval_loss": 0.6002033948898315, "eval_rewards/accuracies": 0.6740000247955322, "eval_rewards/chosen": -1.4823347330093384, "eval_rewards/margins": 0.44055286049842834, "eval_rewards/rejected": -1.9228876829147339, "eval_runtime": 196.9532, "eval_samples_per_second": 10.155, "eval_steps_per_second": 5.077, "step": 7400 }, { "epoch": 0.97, "learning_rate": 1.3911014571098835e-08, "logits/chosen": -2.4495015144348145, "logits/rejected": -2.454916477203369, "logps/chosen": -452.8680725097656, "logps/rejected": -499.41082763671875, "loss": 0.6158, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.4365514516830444, "rewards/margins": 0.3518769443035126, "rewards/rejected": -1.7884283065795898, "step": 7410 }, { "epoch": 0.97, "eval_logits/chosen": -2.2354750633239746, "eval_logits/rejected": -2.2434208393096924, "eval_logps/chosen": -480.9961853027344, "eval_logps/rejected": -493.46746826171875, "eval_loss": 0.5999510288238525, "eval_rewards/accuracies": 0.6754999756813049, "eval_rewards/chosen": -1.4824678897857666, "eval_rewards/margins": 0.4411066174507141, "eval_rewards/rejected": -1.923574686050415, "eval_runtime": 197.1394, "eval_samples_per_second": 10.145, "eval_steps_per_second": 5.073, "step": 7410 }, { "epoch": 0.97, "learning_rate": 1.2733668733773685e-08, "logits/chosen": -2.4694085121154785, "logits/rejected": -2.442884922027588, "logps/chosen": -470.5113220214844, "logps/rejected": -481.81854248046875, "loss": 0.5374, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.4029333591461182, "rewards/margins": 0.563439667224884, "rewards/rejected": -1.9663728475570679, "step": 7420 }, { "epoch": 0.97, "eval_logits/chosen": -2.2354135513305664, "eval_logits/rejected": -2.2434794902801514, "eval_logps/chosen": -481.0262145996094, "eval_logps/rejected": -493.44000244140625, "eval_loss": 0.6002059578895569, "eval_rewards/accuracies": 0.6740000247955322, "eval_rewards/chosen": -1.4827677011489868, "eval_rewards/margins": 0.44053229689598083, "eval_rewards/rejected": -1.923299789428711, "eval_runtime": 197.0352, "eval_samples_per_second": 10.15, "eval_steps_per_second": 5.075, "step": 7420 }, { "epoch": 0.97, "learning_rate": 1.160824467313526e-08, "logits/chosen": -2.4775195121765137, "logits/rejected": -2.447704792022705, "logps/chosen": -536.0025634765625, "logps/rejected": -557.3277587890625, "loss": 0.5666, "rewards/accuracies": 0.6875, "rewards/chosen": -1.443741798400879, "rewards/margins": 0.5521557331085205, "rewards/rejected": -1.9958975315093994, "step": 7430 }, { "epoch": 0.97, "eval_logits/chosen": -2.2347254753112793, "eval_logits/rejected": -2.242621898651123, "eval_logps/chosen": -481.0495910644531, "eval_logps/rejected": -493.4919738769531, "eval_loss": 0.600189745426178, "eval_rewards/accuracies": 0.671999990940094, "eval_rewards/chosen": -1.4830018281936646, "eval_rewards/margins": 0.44081735610961914, "eval_rewards/rejected": -1.9238191843032837, "eval_runtime": 197.1193, "eval_samples_per_second": 10.146, "eval_steps_per_second": 5.073, "step": 7430 }, { "epoch": 0.97, "learning_rate": 1.0534765882453113e-08, "logits/chosen": -2.5495553016662598, "logits/rejected": -2.5306897163391113, "logps/chosen": -444.939453125, "logps/rejected": -473.026123046875, "loss": 0.5652, "rewards/accuracies": 0.6875, "rewards/chosen": -1.3965262174606323, "rewards/margins": 0.45321035385131836, "rewards/rejected": -1.8497365713119507, "step": 7440 }, { "epoch": 0.97, "eval_logits/chosen": -2.2343382835388184, "eval_logits/rejected": -2.2423436641693115, "eval_logps/chosen": -480.95867919921875, "eval_logps/rejected": -493.39239501953125, "eval_loss": 0.6001228094100952, "eval_rewards/accuracies": 0.6744999885559082, "eval_rewards/chosen": -1.4820924997329712, "eval_rewards/margins": 0.4407311975955963, "eval_rewards/rejected": -1.9228236675262451, "eval_runtime": 197.1931, "eval_samples_per_second": 10.142, "eval_steps_per_second": 5.071, "step": 7440 }, { "epoch": 0.97, "learning_rate": 9.513254770636138e-09, "logits/chosen": -2.504429340362549, "logits/rejected": -2.4770166873931885, "logps/chosen": -543.8363037109375, "logps/rejected": -550.0518188476562, "loss": 0.6496, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.6499207019805908, "rewards/margins": 0.3239971101284027, "rewards/rejected": -1.9739177227020264, "step": 7450 }, { "epoch": 0.97, "eval_logits/chosen": -2.2349491119384766, "eval_logits/rejected": -2.242962598800659, "eval_logps/chosen": -480.9340515136719, "eval_logps/rejected": -493.3950500488281, "eval_loss": 0.6000087857246399, "eval_rewards/accuracies": 0.6740000247955322, "eval_rewards/chosen": -1.4818464517593384, "eval_rewards/margins": 0.4410039186477661, "eval_rewards/rejected": -1.922850489616394, "eval_runtime": 196.8291, "eval_samples_per_second": 10.161, "eval_steps_per_second": 5.081, "step": 7450 }, { "epoch": 0.98, "learning_rate": 8.543732661767113e-09, "logits/chosen": -2.437833786010742, "logits/rejected": -2.4614272117614746, "logps/chosen": -493.88262939453125, "logps/rejected": -533.1986083984375, "loss": 0.6299, "rewards/accuracies": 0.625, "rewards/chosen": -1.4723308086395264, "rewards/margins": 0.3506353795528412, "rewards/rejected": -1.8229663372039795, "step": 7460 }, { "epoch": 0.98, "eval_logits/chosen": -2.234743356704712, "eval_logits/rejected": -2.2426867485046387, "eval_logps/chosen": -481.0391845703125, "eval_logps/rejected": -493.4858093261719, "eval_loss": 0.5999827980995178, "eval_rewards/accuracies": 0.671500027179718, "eval_rewards/chosen": -1.4828983545303345, "eval_rewards/margins": 0.44085952639579773, "eval_rewards/rejected": -1.923757791519165, "eval_runtime": 197.1407, "eval_samples_per_second": 10.145, "eval_steps_per_second": 5.073, "step": 7460 }, { "epoch": 0.98, "learning_rate": 7.626219794655553e-09, "logits/chosen": -2.424541473388672, "logits/rejected": -2.410937547683716, "logps/chosen": -449.8960876464844, "logps/rejected": -487.70965576171875, "loss": 0.5913, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.3779528141021729, "rewards/margins": 0.4145236015319824, "rewards/rejected": -1.7924764156341553, "step": 7470 }, { "epoch": 0.98, "eval_logits/chosen": -2.2347500324249268, "eval_logits/rejected": -2.2425975799560547, "eval_logps/chosen": -481.0333251953125, "eval_logps/rejected": -493.48687744140625, "eval_loss": 0.6000844836235046, "eval_rewards/accuracies": 0.6725000143051147, "eval_rewards/chosen": -1.4828383922576904, "eval_rewards/margins": 0.44092994928359985, "eval_rewards/rejected": -1.9237682819366455, "eval_runtime": 197.0357, "eval_samples_per_second": 10.15, "eval_steps_per_second": 5.075, "step": 7470 }, { "epoch": 0.98, "learning_rate": 6.7607353224163896e-09, "logits/chosen": -2.494070053100586, "logits/rejected": -2.460822582244873, "logps/chosen": -476.69012451171875, "logps/rejected": -475.34375, "loss": 0.5809, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.3975374698638916, "rewards/margins": 0.47304433584213257, "rewards/rejected": -1.870581865310669, "step": 7480 }, { "epoch": 0.98, "eval_logits/chosen": -2.235112190246582, "eval_logits/rejected": -2.243116855621338, "eval_logps/chosen": -481.0243835449219, "eval_logps/rejected": -493.4410705566406, "eval_loss": 0.6001359224319458, "eval_rewards/accuracies": 0.6744999885559082, "eval_rewards/chosen": -1.482749342918396, "eval_rewards/margins": 0.4405609965324402, "eval_rewards/rejected": -1.923310399055481, "eval_runtime": 197.066, "eval_samples_per_second": 10.149, "eval_steps_per_second": 5.074, "step": 7480 }, { "epoch": 0.98, "learning_rate": 5.947297312070554e-09, "logits/chosen": -2.3596110343933105, "logits/rejected": -2.344242811203003, "logps/chosen": -517.79931640625, "logps/rejected": -497.13067626953125, "loss": 0.522, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.4211236238479614, "rewards/margins": 0.6752224564552307, "rewards/rejected": -2.096346139907837, "step": 7490 }, { "epoch": 0.98, "eval_logits/chosen": -2.2345526218414307, "eval_logits/rejected": -2.2424867153167725, "eval_logps/chosen": -481.04962158203125, "eval_logps/rejected": -493.4761657714844, "eval_loss": 0.6002518534660339, "eval_rewards/accuracies": 0.6744999885559082, "eval_rewards/chosen": -1.483001947402954, "eval_rewards/margins": 0.4406592547893524, "eval_rewards/rejected": -1.923661231994629, "eval_runtime": 197.026, "eval_samples_per_second": 10.151, "eval_steps_per_second": 5.075, "step": 7490 }, { "epoch": 0.98, "learning_rate": 5.185922744166128e-09, "logits/chosen": -2.4216926097869873, "logits/rejected": -2.4630672931671143, "logps/chosen": -484.6437072753906, "logps/rejected": -527.0128784179688, "loss": 0.4985, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.3571288585662842, "rewards/margins": 0.7009686827659607, "rewards/rejected": -2.0580973625183105, "step": 7500 }, { "epoch": 0.98, "eval_logits/chosen": -2.2345950603485107, "eval_logits/rejected": -2.2425150871276855, "eval_logps/chosen": -481.0412292480469, "eval_logps/rejected": -493.51708984375, "eval_loss": 0.6000384092330933, "eval_rewards/accuracies": 0.6754999756813049, "eval_rewards/chosen": -1.4829176664352417, "eval_rewards/margins": 0.44115301966667175, "eval_rewards/rejected": -1.924070954322815, "eval_runtime": 196.6456, "eval_samples_per_second": 10.171, "eval_steps_per_second": 5.085, "step": 7500 }, { "epoch": 0.98, "learning_rate": 4.476627512425558e-09, "logits/chosen": -2.4267430305480957, "logits/rejected": -2.4429757595062256, "logps/chosen": -481.5536193847656, "logps/rejected": -499.205078125, "loss": 0.5986, "rewards/accuracies": 0.625, "rewards/chosen": -1.4090001583099365, "rewards/margins": 0.3903266489505768, "rewards/rejected": -1.7993266582489014, "step": 7510 }, { "epoch": 0.98, "eval_logits/chosen": -2.234553813934326, "eval_logits/rejected": -2.2424240112304688, "eval_logps/chosen": -481.0640869140625, "eval_logps/rejected": -493.5656433105469, "eval_loss": 0.5998890399932861, "eval_rewards/accuracies": 0.6729999780654907, "eval_rewards/chosen": -1.4831470251083374, "eval_rewards/margins": 0.4414092004299164, "eval_rewards/rejected": -1.9245561361312866, "eval_runtime": 196.947, "eval_samples_per_second": 10.155, "eval_steps_per_second": 5.078, "step": 7510 }, { "epoch": 0.98, "learning_rate": 3.819426423412875e-09, "logits/chosen": -2.4812378883361816, "logits/rejected": -2.4551587104797363, "logps/chosen": -514.4886474609375, "logps/rejected": -527.8831787109375, "loss": 0.5748, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.5342094898223877, "rewards/margins": 0.5676885843276978, "rewards/rejected": -2.101898193359375, "step": 7520 }, { "epoch": 0.98, "eval_logits/chosen": -2.235311985015869, "eval_logits/rejected": -2.243098497390747, "eval_logps/chosen": -481.0576171875, "eval_logps/rejected": -493.5203857421875, "eval_loss": 0.6000725030899048, "eval_rewards/accuracies": 0.6735000014305115, "eval_rewards/chosen": -1.4830819368362427, "eval_rewards/margins": 0.44102197885513306, "eval_rewards/rejected": -1.9241037368774414, "eval_runtime": 197.0258, "eval_samples_per_second": 10.151, "eval_steps_per_second": 5.075, "step": 7520 }, { "epoch": 0.99, "learning_rate": 3.2143331962256053e-09, "logits/chosen": -2.4706759452819824, "logits/rejected": -2.4405550956726074, "logps/chosen": -497.95977783203125, "logps/rejected": -523.1436157226562, "loss": 0.6262, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.4370882511138916, "rewards/margins": 0.4015069901943207, "rewards/rejected": -1.8385951519012451, "step": 7530 }, { "epoch": 0.99, "eval_logits/chosen": -2.234773874282837, "eval_logits/rejected": -2.242658853530884, "eval_logps/chosen": -481.1151428222656, "eval_logps/rejected": -493.6004333496094, "eval_loss": 0.5999842286109924, "eval_rewards/accuracies": 0.675000011920929, "eval_rewards/chosen": -1.4836574792861938, "eval_rewards/margins": 0.4412464201450348, "eval_rewards/rejected": -1.9249041080474854, "eval_runtime": 197.1165, "eval_samples_per_second": 10.146, "eval_steps_per_second": 5.073, "step": 7530 }, { "epoch": 0.99, "learning_rate": 2.6613604622066635e-09, "logits/chosen": -2.541171073913574, "logits/rejected": -2.5328097343444824, "logps/chosen": -455.81689453125, "logps/rejected": -507.7245178222656, "loss": 0.5998, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.3390320539474487, "rewards/margins": 0.43062907457351685, "rewards/rejected": -1.7696613073349, "step": 7540 }, { "epoch": 0.99, "eval_logits/chosen": -2.235093116760254, "eval_logits/rejected": -2.2430434226989746, "eval_logps/chosen": -481.0205993652344, "eval_logps/rejected": -493.4674377441406, "eval_loss": 0.6002621054649353, "eval_rewards/accuracies": 0.6744999885559082, "eval_rewards/chosen": -1.4827115535736084, "eval_rewards/margins": 0.44086259603500366, "eval_rewards/rejected": -1.9235742092132568, "eval_runtime": 197.3737, "eval_samples_per_second": 10.133, "eval_steps_per_second": 5.067, "step": 7540 }, { "epoch": 0.99, "learning_rate": 2.1605197646826228e-09, "logits/chosen": -2.346137523651123, "logits/rejected": -2.3369574546813965, "logps/chosen": -441.90081787109375, "logps/rejected": -449.46826171875, "loss": 0.5577, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.3383251428604126, "rewards/margins": 0.5153234601020813, "rewards/rejected": -1.8536484241485596, "step": 7550 }, { "epoch": 0.99, "eval_logits/chosen": -2.2347373962402344, "eval_logits/rejected": -2.2426373958587646, "eval_logps/chosen": -481.1553955078125, "eval_logps/rejected": -493.6079406738281, "eval_loss": 0.6001291275024414, "eval_rewards/accuracies": 0.6744999885559082, "eval_rewards/chosen": -1.4840598106384277, "eval_rewards/margins": 0.44091925024986267, "eval_rewards/rejected": -1.9249789714813232, "eval_runtime": 196.9374, "eval_samples_per_second": 10.156, "eval_steps_per_second": 5.078, "step": 7550 }, { "epoch": 0.99, "learning_rate": 1.711821558721405e-09, "logits/chosen": -2.4623870849609375, "logits/rejected": -2.449855327606201, "logps/chosen": -520.6915283203125, "logps/rejected": -494.63409423828125, "loss": 0.5462, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.4562956094741821, "rewards/margins": 0.49862104654312134, "rewards/rejected": -1.9549165964126587, "step": 7560 }, { "epoch": 0.99, "eval_logits/chosen": -2.23518705368042, "eval_logits/rejected": -2.2431421279907227, "eval_logps/chosen": -481.0464172363281, "eval_logps/rejected": -493.4737854003906, "eval_loss": 0.6001744270324707, "eval_rewards/accuracies": 0.6735000014305115, "eval_rewards/chosen": -1.4829697608947754, "eval_rewards/margins": 0.44066765904426575, "eval_rewards/rejected": -1.9236375093460083, "eval_runtime": 197.0436, "eval_samples_per_second": 10.15, "eval_steps_per_second": 5.075, "step": 7560 }, { "epoch": 0.99, "learning_rate": 1.3152752109149569e-09, "logits/chosen": -2.4634616374969482, "logits/rejected": -2.4458584785461426, "logps/chosen": -497.2911071777344, "logps/rejected": -506.8922424316406, "loss": 0.6308, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.5672760009765625, "rewards/margins": 0.35916125774383545, "rewards/rejected": -1.9264371395111084, "step": 7570 }, { "epoch": 0.99, "eval_logits/chosen": -2.2348592281341553, "eval_logits/rejected": -2.2428770065307617, "eval_logps/chosen": -480.9827880859375, "eval_logps/rejected": -493.4491882324219, "eval_loss": 0.6000152230262756, "eval_rewards/accuracies": 0.671999990940094, "eval_rewards/chosen": -1.4823333024978638, "eval_rewards/margins": 0.44105857610702515, "eval_rewards/rejected": -1.9233920574188232, "eval_runtime": 197.0461, "eval_samples_per_second": 10.15, "eval_steps_per_second": 5.075, "step": 7570 }, { "epoch": 0.99, "learning_rate": 9.708889991830173e-10, "logits/chosen": -2.4818179607391357, "logits/rejected": -2.464740037918091, "logps/chosen": -490.53399658203125, "logps/rejected": -458.87774658203125, "loss": 0.5767, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.4643405675888062, "rewards/margins": 0.48608309030532837, "rewards/rejected": -1.9504238367080688, "step": 7580 }, { "epoch": 0.99, "eval_logits/chosen": -2.235227346420288, "eval_logits/rejected": -2.2431695461273193, "eval_logps/chosen": -480.998779296875, "eval_logps/rejected": -493.45257568359375, "eval_loss": 0.6000584959983826, "eval_rewards/accuracies": 0.6759999990463257, "eval_rewards/chosen": -1.48249351978302, "eval_rewards/margins": 0.4409320652484894, "eval_rewards/rejected": -1.9234256744384766, "eval_runtime": 196.8116, "eval_samples_per_second": 10.162, "eval_steps_per_second": 5.081, "step": 7580 }, { "epoch": 0.99, "learning_rate": 6.786701125999218e-10, "logits/chosen": -2.364657163619995, "logits/rejected": -2.3832263946533203, "logps/chosen": -484.3373107910156, "logps/rejected": -493.5921325683594, "loss": 0.7282, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -1.6983461380004883, "rewards/margins": 0.2650797963142395, "rewards/rejected": -1.9634259939193726, "step": 7590 }, { "epoch": 0.99, "eval_logits/chosen": -2.2349560260772705, "eval_logits/rejected": -2.242851495742798, "eval_logps/chosen": -480.9801940917969, "eval_logps/rejected": -493.40118408203125, "eval_loss": 0.6001663208007812, "eval_rewards/accuracies": 0.675000011920929, "eval_rewards/chosen": -1.4823077917099, "eval_rewards/margins": 0.4406040608882904, "eval_rewards/rejected": -1.9229116439819336, "eval_runtime": 196.9615, "eval_samples_per_second": 10.154, "eval_steps_per_second": 5.077, "step": 7590 }, { "epoch": 0.99, "learning_rate": 4.3862465124638873e-10, "logits/chosen": -2.3418660163879395, "logits/rejected": -2.384479522705078, "logps/chosen": -473.34197998046875, "logps/rejected": -479.49615478515625, "loss": 0.6687, "rewards/accuracies": 0.625, "rewards/chosen": -1.5078928470611572, "rewards/margins": 0.28139907121658325, "rewards/rejected": -1.7892920970916748, "step": 7600 }, { "epoch": 0.99, "eval_logits/chosen": -2.234585762023926, "eval_logits/rejected": -2.2425272464752197, "eval_logps/chosen": -481.1004333496094, "eval_logps/rejected": -493.569091796875, "eval_loss": 0.6000926494598389, "eval_rewards/accuracies": 0.6754999756813049, "eval_rewards/chosen": -1.48350989818573, "eval_rewards/margins": 0.4410809576511383, "eval_rewards/rejected": -1.924590826034546, "eval_runtime": 197.1425, "eval_samples_per_second": 10.145, "eval_steps_per_second": 5.072, "step": 7600 }, { "epoch": 1.0, "learning_rate": 2.507576260799005e-10, "logits/chosen": -2.5632288455963135, "logits/rejected": -2.518597364425659, "logps/chosen": -519.2454833984375, "logps/rejected": -549.087158203125, "loss": 0.5566, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.4352588653564453, "rewards/margins": 0.5633091926574707, "rewards/rejected": -1.9985681772232056, "step": 7610 }, { "epoch": 1.0, "eval_logits/chosen": -2.234978437423706, "eval_logits/rejected": -2.242854118347168, "eval_logps/chosen": -481.08746337890625, "eval_logps/rejected": -493.5091857910156, "eval_loss": 0.6003447771072388, "eval_rewards/accuracies": 0.6735000014305115, "eval_rewards/chosen": -1.4833803176879883, "eval_rewards/margins": 0.44061169028282166, "eval_rewards/rejected": -1.9239921569824219, "eval_runtime": 196.9114, "eval_samples_per_second": 10.157, "eval_steps_per_second": 5.078, "step": 7610 }, { "epoch": 1.0, "learning_rate": 1.1507295883145253e-10, "logits/chosen": -2.475334644317627, "logits/rejected": -2.509917736053467, "logps/chosen": -488.1448669433594, "logps/rejected": -532.739501953125, "loss": 0.5543, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.3513810634613037, "rewards/margins": 0.5696924924850464, "rewards/rejected": -1.92107355594635, "step": 7620 }, { "epoch": 1.0, "eval_logits/chosen": -2.235002040863037, "eval_logits/rejected": -2.2428789138793945, "eval_logps/chosen": -481.00299072265625, "eval_logps/rejected": -493.47991943359375, "eval_loss": 0.6000400185585022, "eval_rewards/accuracies": 0.6729999780654907, "eval_rewards/chosen": -1.4825358390808105, "eval_rewards/margins": 0.4411628842353821, "eval_rewards/rejected": -1.9236990213394165, "eval_runtime": 197.0703, "eval_samples_per_second": 10.149, "eval_steps_per_second": 5.074, "step": 7620 }, { "epoch": 1.0, "learning_rate": 3.1573481923952156e-11, "logits/chosen": -2.420581817626953, "logits/rejected": -2.3721389770507812, "logps/chosen": -537.7681884765625, "logps/rejected": -545.4634399414062, "loss": 0.5888, "rewards/accuracies": 0.75, "rewards/chosen": -1.4983834028244019, "rewards/margins": 0.4905606806278229, "rewards/rejected": -1.9889440536499023, "step": 7630 }, { "epoch": 1.0, "eval_logits/chosen": -2.234976291656494, "eval_logits/rejected": -2.243018627166748, "eval_logps/chosen": -480.9837951660156, "eval_logps/rejected": -493.3998107910156, "eval_loss": 0.6002876162528992, "eval_rewards/accuracies": 0.6744999885559082, "eval_rewards/chosen": -1.4823437929153442, "eval_rewards/margins": 0.44055426120758057, "eval_rewards/rejected": -1.9228979349136353, "eval_runtime": 196.934, "eval_samples_per_second": 10.156, "eval_steps_per_second": 5.078, "step": 7630 }, { "epoch": 1.0, "learning_rate": 2.609384119889313e-13, "logits/chosen": -2.3895089626312256, "logits/rejected": -2.3862829208374023, "logps/chosen": -467.02752685546875, "logps/rejected": -502.67852783203125, "loss": 0.5937, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -1.4769508838653564, "rewards/margins": 0.4055989384651184, "rewards/rejected": -1.8825498819351196, "step": 7640 }, { "epoch": 1.0, "eval_logits/chosen": -2.2347497940063477, "eval_logits/rejected": -2.242765188217163, "eval_logps/chosen": -481.00506591796875, "eval_logps/rejected": -493.4847717285156, "eval_loss": 0.599940299987793, "eval_rewards/accuracies": 0.6735000014305115, "eval_rewards/chosen": -1.4825562238693237, "eval_rewards/margins": 0.4411916732788086, "eval_rewards/rejected": -1.9237478971481323, "eval_runtime": 196.8595, "eval_samples_per_second": 10.16, "eval_steps_per_second": 5.08, "step": 7640 }, { "epoch": 1.0, "step": 7641, "total_flos": 0.0, "train_loss": 0.6145847465156994, "train_runtime": 171708.6447, "train_samples_per_second": 0.356, "train_steps_per_second": 0.044 } ], "logging_steps": 10, "max_steps": 7641, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }