diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,22964 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9998854993048172, + "eval_steps": 10, + "global_step": 7641, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 6.535947712418301e-09, + "logits/chosen": -3.0474565029144287, + "logits/rejected": -3.0019595623016357, + "logps/chosen": -250.30178833007812, + "logps/rejected": -231.682373046875, + "loss": 0.6933, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.00028943538200110197, + "rewards/margins": -0.0002489328326191753, + "rewards/rejected": -4.050254574394785e-05, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 6.535947712418302e-08, + "logits/chosen": -2.9978737831115723, + "logits/rejected": -3.0040385723114014, + "logps/chosen": -347.8559875488281, + "logps/rejected": -305.50567626953125, + "loss": 0.693, + "rewards/accuracies": 0.4305555522441864, + "rewards/chosen": -0.0002880638639908284, + "rewards/margins": 0.0003410349600017071, + "rewards/rejected": -0.0006290989113040268, + "step": 10 + }, + { + "epoch": 0.0, + "eval_logits/chosen": -2.7424161434173584, + "eval_logits/rejected": -2.7351112365722656, + "eval_logps/chosen": -332.7445373535156, + "eval_logps/rejected": -301.1111755371094, + "eval_loss": 0.6931213140487671, + "eval_rewards/accuracies": 0.4964999854564667, + "eval_rewards/chosen": 4.8589161451673135e-05, + "eval_rewards/margins": 6.048592695151456e-05, + "eval_rewards/rejected": -1.189680006064009e-05, + "eval_runtime": 196.7026, + "eval_samples_per_second": 10.168, + "eval_steps_per_second": 5.084, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 1.3071895424836603e-07, + "logits/chosen": -3.037752628326416, + "logits/rejected": -3.0157015323638916, + "logps/chosen": -326.78704833984375, + "logps/rejected": -328.48126220703125, + "loss": 0.6932, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": 0.000477545807370916, + "rewards/margins": -0.00011621458543231711, + "rewards/rejected": 0.0005937603418715298, + "step": 20 + }, + { + "epoch": 0.0, + "eval_logits/chosen": -2.7427480220794678, + "eval_logits/rejected": -2.7354896068573, + "eval_logps/chosen": -332.742431640625, + "eval_logps/rejected": -301.1186218261719, + "eval_loss": 0.6930737495422363, + "eval_rewards/accuracies": 0.49799999594688416, + "eval_rewards/chosen": 6.974298594286665e-05, + "eval_rewards/margins": 0.00015557045117020607, + "eval_rewards/rejected": -8.582745067542419e-05, + "eval_runtime": 196.8696, + "eval_samples_per_second": 10.159, + "eval_steps_per_second": 5.08, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 1.9607843137254904e-07, + "logits/chosen": -2.990180492401123, + "logits/rejected": -2.9676098823547363, + "logps/chosen": -294.37188720703125, + "logps/rejected": -254.3704833984375, + "loss": 0.6931, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": 0.00014316548185888678, + "rewards/margins": 3.067384386667982e-05, + "rewards/rejected": 0.00011249161616433412, + "step": 30 + }, + { + "epoch": 0.0, + "eval_logits/chosen": -2.7423112392425537, + "eval_logits/rejected": -2.735067367553711, + "eval_logps/chosen": -332.74560546875, + "eval_logps/rejected": -301.10736083984375, + "eval_loss": 0.6931455731391907, + "eval_rewards/accuracies": 0.49300000071525574, + "eval_rewards/chosen": 3.810242560575716e-05, + "eval_rewards/margins": 1.1804982023022603e-05, + "eval_rewards/rejected": 2.629743903526105e-05, + "eval_runtime": 196.8402, + "eval_samples_per_second": 10.161, + "eval_steps_per_second": 5.08, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 2.6143790849673207e-07, + "logits/chosen": -3.080005168914795, + "logits/rejected": -2.9933598041534424, + "logps/chosen": -330.540771484375, + "logps/rejected": -295.5124816894531, + "loss": 0.6931, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": 0.00030000676633790135, + "rewards/margins": 0.00012870438513346016, + "rewards/rejected": 0.00017130242486018687, + "step": 40 + }, + { + "epoch": 0.01, + "eval_logits/chosen": -2.742424964904785, + "eval_logits/rejected": -2.7351646423339844, + "eval_logps/chosen": -332.74249267578125, + "eval_logps/rejected": -301.1159973144531, + "eval_loss": 0.6930870413780212, + "eval_rewards/accuracies": 0.5095000267028809, + "eval_rewards/chosen": 6.893646059324965e-05, + "eval_rewards/margins": 0.0001285538892261684, + "eval_rewards/rejected": -5.961741408100352e-05, + "eval_runtime": 196.9756, + "eval_samples_per_second": 10.154, + "eval_steps_per_second": 5.077, + "step": 40 + }, + { + "epoch": 0.01, + "learning_rate": 3.267973856209151e-07, + "logits/chosen": -3.063934564590454, + "logits/rejected": -3.077270984649658, + "logps/chosen": -284.46533203125, + "logps/rejected": -276.5115661621094, + "loss": 0.6935, + "rewards/accuracies": 0.4375, + "rewards/chosen": -0.00018404402362648398, + "rewards/margins": -0.0007704938179813325, + "rewards/rejected": 0.000586449692491442, + "step": 50 + }, + { + "epoch": 0.01, + "eval_logits/chosen": -2.7421915531158447, + "eval_logits/rejected": -2.734881639480591, + "eval_logps/chosen": -332.7299499511719, + "eval_logps/rejected": -301.101318359375, + "eval_loss": 0.6930976510047913, + "eval_rewards/accuracies": 0.49799999594688416, + "eval_rewards/chosen": 0.000194655847735703, + "eval_rewards/margins": 0.00010741400183178484, + "eval_rewards/rejected": 8.724184590391815e-05, + "eval_runtime": 196.923, + "eval_samples_per_second": 10.156, + "eval_steps_per_second": 5.078, + "step": 50 + }, + { + "epoch": 0.01, + "learning_rate": 3.921568627450981e-07, + "logits/chosen": -3.013704776763916, + "logits/rejected": -3.0369315147399902, + "logps/chosen": -328.0228271484375, + "logps/rejected": -295.39581298828125, + "loss": 0.6932, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": -0.001045036711730063, + "rewards/margins": -0.00013430326362140477, + "rewards/rejected": -0.0009107333607971668, + "step": 60 + }, + { + "epoch": 0.01, + "eval_logits/chosen": -2.7421178817749023, + "eval_logits/rejected": -2.7349209785461426, + "eval_logps/chosen": -332.7392883300781, + "eval_logps/rejected": -301.1269836425781, + "eval_loss": 0.6930162906646729, + "eval_rewards/accuracies": 0.5099999904632568, + "eval_rewards/chosen": 0.00010118891077581793, + "eval_rewards/margins": 0.0002707123931031674, + "eval_rewards/rejected": -0.0001695234968792647, + "eval_runtime": 196.9045, + "eval_samples_per_second": 10.157, + "eval_steps_per_second": 5.079, + "step": 60 + }, + { + "epoch": 0.01, + "learning_rate": 4.5751633986928105e-07, + "logits/chosen": -3.069620132446289, + "logits/rejected": -3.0621676445007324, + "logps/chosen": -301.7582702636719, + "logps/rejected": -252.75460815429688, + "loss": 0.6933, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.0005456652725115418, + "rewards/margins": -0.0003828687476925552, + "rewards/rejected": -0.00016279640840366483, + "step": 70 + }, + { + "epoch": 0.01, + "eval_logits/chosen": -2.7423887252807617, + "eval_logits/rejected": -2.7351789474487305, + "eval_logps/chosen": -332.7414855957031, + "eval_logps/rejected": -301.119384765625, + "eval_loss": 0.6930652856826782, + "eval_rewards/accuracies": 0.5210000276565552, + "eval_rewards/chosen": 7.922769873403013e-05, + "eval_rewards/margins": 0.00017279147868975997, + "eval_rewards/rejected": -9.356377995572984e-05, + "eval_runtime": 196.7866, + "eval_samples_per_second": 10.163, + "eval_steps_per_second": 5.082, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 5.228758169934641e-07, + "logits/chosen": -3.0048069953918457, + "logits/rejected": -3.002398729324341, + "logps/chosen": -354.9811096191406, + "logps/rejected": -344.3815002441406, + "loss": 0.693, + "rewards/accuracies": 0.512499988079071, + "rewards/chosen": 0.00019456178415566683, + "rewards/margins": 0.00027323514223098755, + "rewards/rejected": -7.867337990319356e-05, + "step": 80 + }, + { + "epoch": 0.01, + "eval_logits/chosen": -2.7424306869506836, + "eval_logits/rejected": -2.7352046966552734, + "eval_logps/chosen": -332.7282409667969, + "eval_logps/rejected": -301.10968017578125, + "eval_loss": 0.6930477023124695, + "eval_rewards/accuracies": 0.5009999871253967, + "eval_rewards/chosen": 0.00021137729345355183, + "eval_rewards/margins": 0.0002077910612570122, + "eval_rewards/rejected": 3.5862587992596673e-06, + "eval_runtime": 196.8147, + "eval_samples_per_second": 10.162, + "eval_steps_per_second": 5.081, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 5.882352941176471e-07, + "logits/chosen": -3.1034655570983887, + "logits/rejected": -3.0644783973693848, + "logps/chosen": -319.18951416015625, + "logps/rejected": -283.13232421875, + "loss": 0.6933, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": 0.0005190152442082763, + "rewards/margins": -0.00037205187254585326, + "rewards/rejected": 0.0008910670876502991, + "step": 90 + }, + { + "epoch": 0.01, + "eval_logits/chosen": -2.7423925399780273, + "eval_logits/rejected": -2.735030174255371, + "eval_logps/chosen": -332.74163818359375, + "eval_logps/rejected": -301.1065979003906, + "eval_loss": 0.6931295394897461, + "eval_rewards/accuracies": 0.4975000023841858, + "eval_rewards/chosen": 7.795435521984473e-05, + "eval_rewards/margins": 4.392163464217447e-05, + "eval_rewards/rejected": 3.403272057767026e-05, + "eval_runtime": 196.7725, + "eval_samples_per_second": 10.164, + "eval_steps_per_second": 5.082, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 6.535947712418302e-07, + "logits/chosen": -3.0161807537078857, + "logits/rejected": -3.0300960540771484, + "logps/chosen": -290.94915771484375, + "logps/rejected": -294.79486083984375, + "loss": 0.6935, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.00010955418838420883, + "rewards/margins": -0.0007335743284784257, + "rewards/rejected": 0.0006240200018510222, + "step": 100 + }, + { + "epoch": 0.01, + "eval_logits/chosen": -2.7424075603485107, + "eval_logits/rejected": -2.735180377960205, + "eval_logps/chosen": -332.7455749511719, + "eval_logps/rejected": -301.0979309082031, + "eval_loss": 0.6931926608085632, + "eval_rewards/accuracies": 0.49399998784065247, + "eval_rewards/chosen": 3.826828833553009e-05, + "eval_rewards/margins": -8.247687219409272e-05, + "eval_rewards/rejected": 0.00012074514233972877, + "eval_runtime": 196.9629, + "eval_samples_per_second": 10.154, + "eval_steps_per_second": 5.077, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 7.189542483660131e-07, + "logits/chosen": -3.028716564178467, + "logits/rejected": -3.024019479751587, + "logps/chosen": -340.0860595703125, + "logps/rejected": -301.8324890136719, + "loss": 0.6925, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.0008878801017999649, + "rewards/margins": 0.001357215573079884, + "rewards/rejected": -0.00046933552948758006, + "step": 110 + }, + { + "epoch": 0.01, + "eval_logits/chosen": -2.7424967288970947, + "eval_logits/rejected": -2.7352287769317627, + "eval_logps/chosen": -332.75115966796875, + "eval_logps/rejected": -301.1202697753906, + "eval_loss": 0.693109393119812, + "eval_rewards/accuracies": 0.4894999861717224, + "eval_rewards/chosen": -1.78045538632432e-05, + "eval_rewards/margins": 8.483259443892166e-05, + "eval_rewards/rejected": -0.00010263712465530261, + "eval_runtime": 196.8794, + "eval_samples_per_second": 10.159, + "eval_steps_per_second": 5.079, + "step": 110 + }, + { + "epoch": 0.02, + "learning_rate": 7.843137254901962e-07, + "logits/chosen": -3.083116054534912, + "logits/rejected": -3.059950590133667, + "logps/chosen": -355.63006591796875, + "logps/rejected": -278.5542907714844, + "loss": 0.6927, + "rewards/accuracies": 0.5625, + "rewards/chosen": 0.0005385326221585274, + "rewards/margins": 0.0008816570043563843, + "rewards/rejected": -0.0003431244404055178, + "step": 120 + }, + { + "epoch": 0.02, + "eval_logits/chosen": -2.7423059940338135, + "eval_logits/rejected": -2.735048770904541, + "eval_logps/chosen": -332.7540283203125, + "eval_logps/rejected": -301.1204833984375, + "eval_loss": 0.6931224465370178, + "eval_rewards/accuracies": 0.492000013589859, + "eval_rewards/chosen": -4.632035779650323e-05, + "eval_rewards/margins": 5.811014852952212e-05, + "eval_rewards/rejected": -0.00010443051723996177, + "eval_runtime": 197.0438, + "eval_samples_per_second": 10.15, + "eval_steps_per_second": 5.075, + "step": 120 + }, + { + "epoch": 0.02, + "learning_rate": 8.496732026143792e-07, + "logits/chosen": -3.0233044624328613, + "logits/rejected": -3.0531599521636963, + "logps/chosen": -311.4083557128906, + "logps/rejected": -283.33258056640625, + "loss": 0.6931, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -6.241817118279869e-06, + "rewards/margins": 0.00016770794172771275, + "rewards/rejected": -0.00017394970927853137, + "step": 130 + }, + { + "epoch": 0.02, + "eval_logits/chosen": -2.742424249649048, + "eval_logits/rejected": -2.735227346420288, + "eval_logps/chosen": -332.7500305175781, + "eval_logps/rejected": -301.1299133300781, + "eval_loss": 0.6930555701255798, + "eval_rewards/accuracies": 0.5084999799728394, + "eval_rewards/chosen": -6.5807921600935515e-06, + "eval_rewards/margins": 0.00019208044977858663, + "eval_rewards/rejected": -0.00019866121874656528, + "eval_runtime": 197.2122, + "eval_samples_per_second": 10.141, + "eval_steps_per_second": 5.071, + "step": 130 + }, + { + "epoch": 0.02, + "learning_rate": 9.150326797385621e-07, + "logits/chosen": -2.984133243560791, + "logits/rejected": -2.9490771293640137, + "logps/chosen": -327.62127685546875, + "logps/rejected": -287.3335266113281, + "loss": 0.6932, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": -0.0002134219103027135, + "rewards/margins": -4.010857082903385e-05, + "rewards/rejected": -0.00017331326671410352, + "step": 140 + }, + { + "epoch": 0.02, + "eval_logits/chosen": -2.742417812347412, + "eval_logits/rejected": -2.7351322174072266, + "eval_logps/chosen": -332.7521667480469, + "eval_logps/rejected": -301.12445068359375, + "eval_loss": 0.6930928826332092, + "eval_rewards/accuracies": 0.4984999895095825, + "eval_rewards/chosen": -2.7572192266234197e-05, + "eval_rewards/margins": 0.00011697168520186096, + "eval_rewards/rejected": -0.00014454391202889383, + "eval_runtime": 196.8258, + "eval_samples_per_second": 10.161, + "eval_steps_per_second": 5.081, + "step": 140 + }, + { + "epoch": 0.02, + "learning_rate": 9.80392156862745e-07, + "logits/chosen": -3.0864033699035645, + "logits/rejected": -3.026364803314209, + "logps/chosen": -402.3047790527344, + "logps/rejected": -355.61175537109375, + "loss": 0.6925, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.0004579071537591517, + "rewards/margins": 0.0012378387618809938, + "rewards/rejected": -0.0007799316081218421, + "step": 150 + }, + { + "epoch": 0.02, + "eval_logits/chosen": -2.74202823638916, + "eval_logits/rejected": -2.7348246574401855, + "eval_logps/chosen": -332.7433776855469, + "eval_logps/rejected": -301.11444091796875, + "eval_loss": 0.6930994987487793, + "eval_rewards/accuracies": 0.5015000104904175, + "eval_rewards/chosen": 6.008195850881748e-05, + "eval_rewards/margins": 0.00010428918903926387, + "eval_rewards/rejected": -4.4207245082361624e-05, + "eval_runtime": 197.2211, + "eval_samples_per_second": 10.141, + "eval_steps_per_second": 5.07, + "step": 150 + }, + { + "epoch": 0.02, + "learning_rate": 1.0457516339869283e-06, + "logits/chosen": -3.0945613384246826, + "logits/rejected": -3.0280470848083496, + "logps/chosen": -308.03509521484375, + "logps/rejected": -273.6764221191406, + "loss": 0.6931, + "rewards/accuracies": 0.512499988079071, + "rewards/chosen": -9.9400152976159e-05, + "rewards/margins": 0.0001606243458809331, + "rewards/rejected": -0.0002600245352368802, + "step": 160 + }, + { + "epoch": 0.02, + "eval_logits/chosen": -2.741826057434082, + "eval_logits/rejected": -2.7345821857452393, + "eval_logps/chosen": -332.755615234375, + "eval_logps/rejected": -301.11944580078125, + "eval_loss": 0.6931356191635132, + "eval_rewards/accuracies": 0.4934999942779541, + "eval_rewards/chosen": -6.196425965754315e-05, + "eval_rewards/margins": 3.2061645470093936e-05, + "eval_rewards/rejected": -9.402589057572186e-05, + "eval_runtime": 196.7162, + "eval_samples_per_second": 10.167, + "eval_steps_per_second": 5.083, + "step": 160 + }, + { + "epoch": 0.02, + "learning_rate": 1.111111111111111e-06, + "logits/chosen": -3.0090110301971436, + "logits/rejected": -3.0139718055725098, + "logps/chosen": -300.4590148925781, + "logps/rejected": -270.7416076660156, + "loss": 0.693, + "rewards/accuracies": 0.5625, + "rewards/chosen": 2.0842790036113e-05, + "rewards/margins": 0.0002311690041096881, + "rewards/rejected": -0.0002103261649608612, + "step": 170 + }, + { + "epoch": 0.02, + "eval_logits/chosen": -2.7421629428863525, + "eval_logits/rejected": -2.7350175380706787, + "eval_logps/chosen": -332.75689697265625, + "eval_logps/rejected": -301.1203308105469, + "eval_loss": 0.6931375861167908, + "eval_rewards/accuracies": 0.4925000071525574, + "eval_rewards/chosen": -7.511243893532082e-05, + "eval_rewards/margins": 2.809734723996371e-05, + "eval_rewards/rejected": -0.0001032097716233693, + "eval_runtime": 196.9725, + "eval_samples_per_second": 10.154, + "eval_steps_per_second": 5.077, + "step": 170 + }, + { + "epoch": 0.02, + "learning_rate": 1.1764705882352942e-06, + "logits/chosen": -3.080929756164551, + "logits/rejected": -2.997765302658081, + "logps/chosen": -412.30596923828125, + "logps/rejected": -311.81390380859375, + "loss": 0.6938, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": -0.0013919133925810456, + "rewards/margins": -0.0012244291137903929, + "rewards/rejected": -0.00016748439520597458, + "step": 180 + }, + { + "epoch": 0.02, + "eval_logits/chosen": -2.742008924484253, + "eval_logits/rejected": -2.734745979309082, + "eval_logps/chosen": -332.7729797363281, + "eval_logps/rejected": -301.13446044921875, + "eval_loss": 0.693146824836731, + "eval_rewards/accuracies": 0.5, + "eval_rewards/chosen": -0.0002355735341552645, + "eval_rewards/margins": 9.156420674116816e-06, + "eval_rewards/rejected": -0.0002447299484629184, + "eval_runtime": 196.9856, + "eval_samples_per_second": 10.153, + "eval_steps_per_second": 5.077, + "step": 180 + }, + { + "epoch": 0.02, + "learning_rate": 1.2418300653594772e-06, + "logits/chosen": -2.985764980316162, + "logits/rejected": -2.968858480453491, + "logps/chosen": -301.422607421875, + "logps/rejected": -254.9043426513672, + "loss": 0.6929, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.0002511686470825225, + "rewards/margins": 0.00046358705731108785, + "rewards/rejected": -0.000714755617082119, + "step": 190 + }, + { + "epoch": 0.02, + "eval_logits/chosen": -2.742161273956299, + "eval_logits/rejected": -2.7349143028259277, + "eval_logps/chosen": -332.7518005371094, + "eval_logps/rejected": -301.1442565917969, + "eval_loss": 0.6929922699928284, + "eval_rewards/accuracies": 0.5260000228881836, + "eval_rewards/chosen": -2.4143202608684078e-05, + "eval_rewards/margins": 0.00031821097945794463, + "eval_rewards/rejected": -0.0003423541784286499, + "eval_runtime": 196.8806, + "eval_samples_per_second": 10.158, + "eval_steps_per_second": 5.079, + "step": 190 + }, + { + "epoch": 0.03, + "learning_rate": 1.3071895424836604e-06, + "logits/chosen": -3.056624174118042, + "logits/rejected": -3.0141055583953857, + "logps/chosen": -298.284912109375, + "logps/rejected": -298.84381103515625, + "loss": 0.6924, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 9.34753697947599e-05, + "rewards/margins": 0.0014477561926469207, + "rewards/rejected": -0.0013542806264013052, + "step": 200 + }, + { + "epoch": 0.03, + "eval_logits/chosen": -2.7422404289245605, + "eval_logits/rejected": -2.7349939346313477, + "eval_logps/chosen": -332.7804260253906, + "eval_logps/rejected": -301.1587829589844, + "eval_loss": 0.693062961101532, + "eval_rewards/accuracies": 0.5040000081062317, + "eval_rewards/chosen": -0.00031018684967420995, + "eval_rewards/margins": 0.00017730562831275165, + "eval_rewards/rejected": -0.00048749250709079206, + "eval_runtime": 197.0551, + "eval_samples_per_second": 10.149, + "eval_steps_per_second": 5.075, + "step": 200 + }, + { + "epoch": 0.03, + "learning_rate": 1.3725490196078434e-06, + "logits/chosen": -3.103909730911255, + "logits/rejected": -3.096003293991089, + "logps/chosen": -335.46551513671875, + "logps/rejected": -304.6739196777344, + "loss": 0.6932, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.000499956717249006, + "rewards/margins": -8.5618878074456e-05, + "rewards/rejected": -0.00041433790465816855, + "step": 210 + }, + { + "epoch": 0.03, + "eval_logits/chosen": -2.7421951293945312, + "eval_logits/rejected": -2.7350261211395264, + "eval_logps/chosen": -332.7716979980469, + "eval_logps/rejected": -301.14794921875, + "eval_loss": 0.6930733919143677, + "eval_rewards/accuracies": 0.5049999952316284, + "eval_rewards/chosen": -0.00022285518934950233, + "eval_rewards/margins": 0.0001561456301715225, + "eval_rewards/rejected": -0.0003790008195210248, + "eval_runtime": 196.773, + "eval_samples_per_second": 10.164, + "eval_steps_per_second": 5.082, + "step": 210 + }, + { + "epoch": 0.03, + "learning_rate": 1.4379084967320261e-06, + "logits/chosen": -3.006915330886841, + "logits/rejected": -2.964789628982544, + "logps/chosen": -329.61102294921875, + "logps/rejected": -297.4481506347656, + "loss": 0.6934, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.000635097618214786, + "rewards/margins": -0.0005649608210660517, + "rewards/rejected": -7.013681170064956e-05, + "step": 220 + }, + { + "epoch": 0.03, + "eval_logits/chosen": -2.741764783859253, + "eval_logits/rejected": -2.734501600265503, + "eval_logps/chosen": -332.7756042480469, + "eval_logps/rejected": -301.16070556640625, + "eval_loss": 0.6930290460586548, + "eval_rewards/accuracies": 0.4964999854564667, + "eval_rewards/chosen": -0.00026174308732151985, + "eval_rewards/margins": 0.00024544313782826066, + "eval_rewards/rejected": -0.0005071861669421196, + "eval_runtime": 197.1268, + "eval_samples_per_second": 10.146, + "eval_steps_per_second": 5.073, + "step": 220 + }, + { + "epoch": 0.03, + "learning_rate": 1.5032679738562091e-06, + "logits/chosen": -2.974337577819824, + "logits/rejected": -2.9849658012390137, + "logps/chosen": -280.42303466796875, + "logps/rejected": -321.53265380859375, + "loss": 0.693, + "rewards/accuracies": 0.48750001192092896, + "rewards/chosen": -0.0003795806842390448, + "rewards/margins": 0.00029520891257561743, + "rewards/rejected": -0.000674789713229984, + "step": 230 + }, + { + "epoch": 0.03, + "eval_logits/chosen": -2.742231845855713, + "eval_logits/rejected": -2.7350893020629883, + "eval_logps/chosen": -332.79168701171875, + "eval_logps/rejected": -301.2016906738281, + "eval_loss": 0.6929041147232056, + "eval_rewards/accuracies": 0.5289999842643738, + "eval_rewards/chosen": -0.0004225261218380183, + "eval_rewards/margins": 0.0004941746010445058, + "eval_rewards/rejected": -0.0009167007519863546, + "eval_runtime": 197.2194, + "eval_samples_per_second": 10.141, + "eval_steps_per_second": 5.07, + "step": 230 + }, + { + "epoch": 0.03, + "learning_rate": 1.5686274509803923e-06, + "logits/chosen": -2.9993062019348145, + "logits/rejected": -3.0187880992889404, + "logps/chosen": -287.79766845703125, + "logps/rejected": -308.9731750488281, + "loss": 0.6928, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.0008283854695037007, + "rewards/margins": 0.0007049053674563766, + "rewards/rejected": -0.0015332909533753991, + "step": 240 + }, + { + "epoch": 0.03, + "eval_logits/chosen": -2.742602825164795, + "eval_logits/rejected": -2.7353687286376953, + "eval_logps/chosen": -332.8048095703125, + "eval_logps/rejected": -301.19207763671875, + "eval_loss": 0.6930183172225952, + "eval_rewards/accuracies": 0.5174999833106995, + "eval_rewards/chosen": -0.0005539001431316137, + "eval_rewards/margins": 0.00026647234335541725, + "eval_rewards/rejected": -0.000820372486487031, + "eval_runtime": 197.1539, + "eval_samples_per_second": 10.144, + "eval_steps_per_second": 5.072, + "step": 240 + }, + { + "epoch": 0.03, + "learning_rate": 1.6339869281045753e-06, + "logits/chosen": -3.0508596897125244, + "logits/rejected": -3.0222580432891846, + "logps/chosen": -373.35943603515625, + "logps/rejected": -316.1955871582031, + "loss": 0.6935, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.0007197518716566265, + "rewards/margins": -0.000629595888312906, + "rewards/rejected": -9.015606337925419e-05, + "step": 250 + }, + { + "epoch": 0.03, + "eval_logits/chosen": -2.7422688007354736, + "eval_logits/rejected": -2.735180139541626, + "eval_logps/chosen": -332.8021240234375, + "eval_logps/rejected": -301.2145080566406, + "eval_loss": 0.6928929686546326, + "eval_rewards/accuracies": 0.5370000004768372, + "eval_rewards/chosen": -0.0005272579728625715, + "eval_rewards/margins": 0.0005174549296498299, + "eval_rewards/rejected": -0.0010447128443047404, + "eval_runtime": 197.2346, + "eval_samples_per_second": 10.14, + "eval_steps_per_second": 5.07, + "step": 250 + }, + { + "epoch": 0.03, + "learning_rate": 1.6993464052287585e-06, + "logits/chosen": -3.073085308074951, + "logits/rejected": -3.0740902423858643, + "logps/chosen": -346.2541809082031, + "logps/rejected": -297.8416748046875, + "loss": 0.6934, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.0008511870983056724, + "rewards/margins": -0.0004448608378879726, + "rewards/rejected": -0.0004063262604176998, + "step": 260 + }, + { + "epoch": 0.03, + "eval_logits/chosen": -2.742314100265503, + "eval_logits/rejected": -2.73513126373291, + "eval_logps/chosen": -332.82208251953125, + "eval_logps/rejected": -301.2392272949219, + "eval_loss": 0.6928689479827881, + "eval_rewards/accuracies": 0.5274999737739563, + "eval_rewards/chosen": -0.0007265734602697194, + "eval_rewards/margins": 0.0005655785789713264, + "eval_rewards/rejected": -0.0012921523302793503, + "eval_runtime": 196.9217, + "eval_samples_per_second": 10.156, + "eval_steps_per_second": 5.078, + "step": 260 + }, + { + "epoch": 0.04, + "learning_rate": 1.7647058823529414e-06, + "logits/chosen": -3.002138137817383, + "logits/rejected": -2.992426872253418, + "logps/chosen": -294.773681640625, + "logps/rejected": -267.15521240234375, + "loss": 0.6937, + "rewards/accuracies": 0.42500001192092896, + "rewards/chosen": -0.0016117170453071594, + "rewards/margins": -0.0011218027211725712, + "rewards/rejected": -0.0004899144405499101, + "step": 270 + }, + { + "epoch": 0.04, + "eval_logits/chosen": -2.7421512603759766, + "eval_logits/rejected": -2.734945774078369, + "eval_logps/chosen": -332.82757568359375, + "eval_logps/rejected": -301.2431335449219, + "eval_loss": 0.692876935005188, + "eval_rewards/accuracies": 0.5149999856948853, + "eval_rewards/chosen": -0.0007814643904566765, + "eval_rewards/margins": 0.0005501382402144372, + "eval_rewards/rejected": -0.0013316025724634528, + "eval_runtime": 196.9036, + "eval_samples_per_second": 10.157, + "eval_steps_per_second": 5.079, + "step": 270 + }, + { + "epoch": 0.04, + "learning_rate": 1.8300653594771242e-06, + "logits/chosen": -3.0527281761169434, + "logits/rejected": -3.0102436542510986, + "logps/chosen": -380.1999206542969, + "logps/rejected": -336.00726318359375, + "loss": 0.6925, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.0011029274901375175, + "rewards/margins": 0.0013822071487084031, + "rewards/rejected": -0.002485134406015277, + "step": 280 + }, + { + "epoch": 0.04, + "eval_logits/chosen": -2.742446184158325, + "eval_logits/rejected": -2.7353570461273193, + "eval_logps/chosen": -332.843505859375, + "eval_logps/rejected": -301.2621765136719, + "eval_loss": 0.6928617358207703, + "eval_rewards/accuracies": 0.5270000100135803, + "eval_rewards/chosen": -0.0009410838829353452, + "eval_rewards/margins": 0.0005805276450701058, + "eval_rewards/rejected": -0.0015216115862131119, + "eval_runtime": 197.0489, + "eval_samples_per_second": 10.15, + "eval_steps_per_second": 5.075, + "step": 280 + }, + { + "epoch": 0.04, + "learning_rate": 1.8954248366013072e-06, + "logits/chosen": -3.0255136489868164, + "logits/rejected": -3.032275676727295, + "logps/chosen": -337.3700256347656, + "logps/rejected": -302.2608642578125, + "loss": 0.6927, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.0005425974959507585, + "rewards/margins": 0.0008994883974082768, + "rewards/rejected": -0.0014420859515666962, + "step": 290 + }, + { + "epoch": 0.04, + "eval_logits/chosen": -2.741929292678833, + "eval_logits/rejected": -2.734889030456543, + "eval_logps/chosen": -332.8692626953125, + "eval_logps/rejected": -301.28790283203125, + "eval_loss": 0.6928617358207703, + "eval_rewards/accuracies": 0.5320000052452087, + "eval_rewards/chosen": -0.001198362559080124, + "eval_rewards/margins": 0.0005806823610328138, + "eval_rewards/rejected": -0.0017790448619052768, + "eval_runtime": 197.0895, + "eval_samples_per_second": 10.148, + "eval_steps_per_second": 5.074, + "step": 290 + }, + { + "epoch": 0.04, + "learning_rate": 1.96078431372549e-06, + "logits/chosen": -3.123136281967163, + "logits/rejected": -3.0728859901428223, + "logps/chosen": -359.8968200683594, + "logps/rejected": -293.3435974121094, + "loss": 0.693, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.0015214609447866678, + "rewards/margins": 0.0003471664385870099, + "rewards/rejected": -0.001868627266958356, + "step": 300 + }, + { + "epoch": 0.04, + "eval_logits/chosen": -2.7420578002929688, + "eval_logits/rejected": -2.734994411468506, + "eval_logps/chosen": -332.8953857421875, + "eval_logps/rejected": -301.3324890136719, + "eval_loss": 0.6927695870399475, + "eval_rewards/accuracies": 0.5394999980926514, + "eval_rewards/chosen": -0.001459623803384602, + "eval_rewards/margins": 0.000765010598115623, + "eval_rewards/rejected": -0.002224634401500225, + "eval_runtime": 197.096, + "eval_samples_per_second": 10.147, + "eval_steps_per_second": 5.074, + "step": 300 + }, + { + "epoch": 0.04, + "learning_rate": 2.0261437908496734e-06, + "logits/chosen": -2.9720630645751953, + "logits/rejected": -2.9839987754821777, + "logps/chosen": -356.2770080566406, + "logps/rejected": -334.4881286621094, + "loss": 0.6922, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.0012176050804555416, + "rewards/margins": 0.0018635308369994164, + "rewards/rejected": -0.003081135917454958, + "step": 310 + }, + { + "epoch": 0.04, + "eval_logits/chosen": -2.7417304515838623, + "eval_logits/rejected": -2.7347323894500732, + "eval_logps/chosen": -332.9208068847656, + "eval_logps/rejected": -301.3729248046875, + "eval_loss": 0.692695140838623, + "eval_rewards/accuracies": 0.5509999990463257, + "eval_rewards/chosen": -0.0017142510041594505, + "eval_rewards/margins": 0.0009148998069576919, + "eval_rewards/rejected": -0.0026291508693248034, + "eval_runtime": 197.0911, + "eval_samples_per_second": 10.148, + "eval_steps_per_second": 5.074, + "step": 310 + }, + { + "epoch": 0.04, + "learning_rate": 2.0915032679738565e-06, + "logits/chosen": -3.0705294609069824, + "logits/rejected": -3.030722141265869, + "logps/chosen": -320.76031494140625, + "logps/rejected": -293.1005859375, + "loss": 0.693, + "rewards/accuracies": 0.512499988079071, + "rewards/chosen": -0.002190458821132779, + "rewards/margins": 0.0002239603054476902, + "rewards/rejected": -0.00241441885009408, + "step": 320 + }, + { + "epoch": 0.04, + "eval_logits/chosen": -2.7417984008789062, + "eval_logits/rejected": -2.734755039215088, + "eval_logps/chosen": -332.95001220703125, + "eval_logps/rejected": -301.4145202636719, + "eval_loss": 0.6926332712173462, + "eval_rewards/accuracies": 0.5584999918937683, + "eval_rewards/chosen": -0.002005940768867731, + "eval_rewards/margins": 0.0010391019750386477, + "eval_rewards/rejected": -0.003045042511075735, + "eval_runtime": 196.943, + "eval_samples_per_second": 10.155, + "eval_steps_per_second": 5.078, + "step": 320 + }, + { + "epoch": 0.04, + "learning_rate": 2.1568627450980393e-06, + "logits/chosen": -3.1108169555664062, + "logits/rejected": -3.049923896789551, + "logps/chosen": -356.15911865234375, + "logps/rejected": -295.27740478515625, + "loss": 0.6927, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.0020984704606235027, + "rewards/margins": 0.000873108278028667, + "rewards/rejected": -0.0029715788550674915, + "step": 330 + }, + { + "epoch": 0.04, + "eval_logits/chosen": -2.741525650024414, + "eval_logits/rejected": -2.734499454498291, + "eval_logps/chosen": -332.9777526855469, + "eval_logps/rejected": -301.4495544433594, + "eval_loss": 0.6925971508026123, + "eval_rewards/accuracies": 0.5519999861717224, + "eval_rewards/chosen": -0.002283054403960705, + "eval_rewards/margins": 0.0011124503798782825, + "eval_rewards/rejected": -0.0033955047838389874, + "eval_runtime": 197.0557, + "eval_samples_per_second": 10.149, + "eval_steps_per_second": 5.075, + "step": 330 + }, + { + "epoch": 0.04, + "learning_rate": 2.222222222222222e-06, + "logits/chosen": -3.033808469772339, + "logits/rejected": -2.980825185775757, + "logps/chosen": -319.2254333496094, + "logps/rejected": -262.8683166503906, + "loss": 0.6924, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.0029052558820694685, + "rewards/margins": 0.0014379608910530806, + "rewards/rejected": -0.004343216773122549, + "step": 340 + }, + { + "epoch": 0.04, + "eval_logits/chosen": -2.7416186332702637, + "eval_logits/rejected": -2.734678268432617, + "eval_logps/chosen": -333.0244445800781, + "eval_logps/rejected": -301.525634765625, + "eval_loss": 0.6924512386322021, + "eval_rewards/accuracies": 0.5600000023841858, + "eval_rewards/chosen": -0.0027503310702741146, + "eval_rewards/margins": 0.0014057998778298497, + "eval_rewards/rejected": -0.00415613129734993, + "eval_runtime": 197.052, + "eval_samples_per_second": 10.15, + "eval_steps_per_second": 5.075, + "step": 340 + }, + { + "epoch": 0.05, + "learning_rate": 2.2875816993464053e-06, + "logits/chosen": -3.092961072921753, + "logits/rejected": -3.0653040409088135, + "logps/chosen": -388.9897155761719, + "logps/rejected": -309.00787353515625, + "loss": 0.6921, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.0030721924267709255, + "rewards/margins": 0.002087064553052187, + "rewards/rejected": -0.0051592574454844, + "step": 350 + }, + { + "epoch": 0.05, + "eval_logits/chosen": -2.741395950317383, + "eval_logits/rejected": -2.73449444770813, + "eval_logps/chosen": -333.0765380859375, + "eval_logps/rejected": -301.61993408203125, + "eval_loss": 0.6922417283058167, + "eval_rewards/accuracies": 0.5724999904632568, + "eval_rewards/chosen": -0.0032712086103856564, + "eval_rewards/margins": 0.0018283347599208355, + "eval_rewards/rejected": -0.0050995429046452045, + "eval_runtime": 196.9644, + "eval_samples_per_second": 10.154, + "eval_steps_per_second": 5.077, + "step": 350 + }, + { + "epoch": 0.05, + "learning_rate": 2.3529411764705885e-06, + "logits/chosen": -3.0397391319274902, + "logits/rejected": -2.995060682296753, + "logps/chosen": -311.85931396484375, + "logps/rejected": -305.2904968261719, + "loss": 0.6918, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.003052308689802885, + "rewards/margins": 0.002707479055970907, + "rewards/rejected": -0.005759787745773792, + "step": 360 + }, + { + "epoch": 0.05, + "eval_logits/chosen": -2.7416577339172363, + "eval_logits/rejected": -2.7348086833953857, + "eval_logps/chosen": -333.14324951171875, + "eval_logps/rejected": -301.695556640625, + "eval_loss": 0.6921982169151306, + "eval_rewards/accuracies": 0.5835000276565552, + "eval_rewards/chosen": -0.003938698675483465, + "eval_rewards/margins": 0.0019165691919624805, + "eval_rewards/rejected": -0.005855268333107233, + "eval_runtime": 197.1033, + "eval_samples_per_second": 10.147, + "eval_steps_per_second": 5.073, + "step": 360 + }, + { + "epoch": 0.05, + "learning_rate": 2.4183006535947716e-06, + "logits/chosen": -2.9846248626708984, + "logits/rejected": -3.012056350708008, + "logps/chosen": -320.0126647949219, + "logps/rejected": -288.37353515625, + "loss": 0.6918, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.004556222353130579, + "rewards/margins": 0.0027006464079022408, + "rewards/rejected": -0.007256869226694107, + "step": 370 + }, + { + "epoch": 0.05, + "eval_logits/chosen": -2.7418692111968994, + "eval_logits/rejected": -2.73514461517334, + "eval_logps/chosen": -333.22711181640625, + "eval_logps/rejected": -301.8189392089844, + "eval_loss": 0.6920028328895569, + "eval_rewards/accuracies": 0.5924999713897705, + "eval_rewards/chosen": -0.0047774785198271275, + "eval_rewards/margins": 0.00231174030341208, + "eval_rewards/rejected": -0.007089219056069851, + "eval_runtime": 197.0339, + "eval_samples_per_second": 10.151, + "eval_steps_per_second": 5.075, + "step": 370 + }, + { + "epoch": 0.05, + "learning_rate": 2.4836601307189544e-06, + "logits/chosen": -3.0387444496154785, + "logits/rejected": -3.033735752105713, + "logps/chosen": -344.07342529296875, + "logps/rejected": -296.765380859375, + "loss": 0.6905, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.003909797873347998, + "rewards/margins": 0.0054032644256949425, + "rewards/rejected": -0.009313062764704227, + "step": 380 + }, + { + "epoch": 0.05, + "eval_logits/chosen": -2.7420990467071533, + "eval_logits/rejected": -2.735419511795044, + "eval_logps/chosen": -333.339599609375, + "eval_logps/rejected": -301.9618835449219, + "eval_loss": 0.6918540596961975, + "eval_rewards/accuracies": 0.5855000019073486, + "eval_rewards/chosen": -0.005901523865759373, + "eval_rewards/margins": 0.002617142628878355, + "eval_rewards/rejected": -0.00851866602897644, + "eval_runtime": 196.9144, + "eval_samples_per_second": 10.157, + "eval_steps_per_second": 5.078, + "step": 380 + }, + { + "epoch": 0.05, + "learning_rate": 2.549019607843137e-06, + "logits/chosen": -2.9668126106262207, + "logits/rejected": -2.9344594478607178, + "logps/chosen": -352.40155029296875, + "logps/rejected": -289.9767150878906, + "loss": 0.691, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.005256765056401491, + "rewards/margins": 0.004349336959421635, + "rewards/rejected": -0.009606102481484413, + "step": 390 + }, + { + "epoch": 0.05, + "eval_logits/chosen": -2.7425131797790527, + "eval_logits/rejected": -2.735957622528076, + "eval_logps/chosen": -333.4939880371094, + "eval_logps/rejected": -302.1752624511719, + "eval_loss": 0.6915651559829712, + "eval_rewards/accuracies": 0.5924999713897705, + "eval_rewards/chosen": -0.0074457875452935696, + "eval_rewards/margins": 0.003206492168828845, + "eval_rewards/rejected": -0.010652278549969196, + "eval_runtime": 196.9498, + "eval_samples_per_second": 10.155, + "eval_steps_per_second": 5.077, + "step": 390 + }, + { + "epoch": 0.05, + "learning_rate": 2.6143790849673208e-06, + "logits/chosen": -3.0899410247802734, + "logits/rejected": -3.1250669956207275, + "logps/chosen": -339.81781005859375, + "logps/rejected": -345.57635498046875, + "loss": 0.6929, + "rewards/accuracies": 0.48750001192092896, + "rewards/chosen": -0.008469333872199059, + "rewards/margins": 0.0005970595520921052, + "rewards/rejected": -0.009066394530236721, + "step": 400 + }, + { + "epoch": 0.05, + "eval_logits/chosen": -2.742565155029297, + "eval_logits/rejected": -2.7361464500427246, + "eval_logps/chosen": -333.64605712890625, + "eval_logps/rejected": -302.40118408203125, + "eval_loss": 0.6912031173706055, + "eval_rewards/accuracies": 0.6004999876022339, + "eval_rewards/chosen": -0.008966467343270779, + "eval_rewards/margins": 0.0039451997727155685, + "eval_rewards/rejected": -0.012911667115986347, + "eval_runtime": 196.9145, + "eval_samples_per_second": 10.157, + "eval_steps_per_second": 5.078, + "step": 400 + }, + { + "epoch": 0.05, + "learning_rate": 2.6797385620915036e-06, + "logits/chosen": -3.0014090538024902, + "logits/rejected": -2.987090587615967, + "logps/chosen": -300.54693603515625, + "logps/rejected": -255.03335571289062, + "loss": 0.6911, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.010720537044107914, + "rewards/margins": 0.004079930018633604, + "rewards/rejected": -0.01480046845972538, + "step": 410 + }, + { + "epoch": 0.05, + "eval_logits/chosen": -2.7424871921539307, + "eval_logits/rejected": -2.7361936569213867, + "eval_logps/chosen": -333.7640380859375, + "eval_logps/rejected": -302.5548095703125, + "eval_loss": 0.6910296678543091, + "eval_rewards/accuracies": 0.6000000238418579, + "eval_rewards/chosen": -0.01014601718634367, + "eval_rewards/margins": 0.004301996435970068, + "eval_rewards/rejected": -0.01444801315665245, + "eval_runtime": 197.0865, + "eval_samples_per_second": 10.148, + "eval_steps_per_second": 5.074, + "step": 410 + }, + { + "epoch": 0.05, + "learning_rate": 2.7450980392156867e-06, + "logits/chosen": -3.0133562088012695, + "logits/rejected": -2.994236469268799, + "logps/chosen": -336.31365966796875, + "logps/rejected": -323.908935546875, + "loss": 0.6914, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.010945516638457775, + "rewards/margins": 0.0036502934526652098, + "rewards/rejected": -0.014595809392631054, + "step": 420 + }, + { + "epoch": 0.05, + "eval_logits/chosen": -2.742440938949585, + "eval_logits/rejected": -2.7361104488372803, + "eval_logps/chosen": -333.8959655761719, + "eval_logps/rejected": -302.7467041015625, + "eval_loss": 0.6907373070716858, + "eval_rewards/accuracies": 0.6044999957084656, + "eval_rewards/chosen": -0.011465570889413357, + "eval_rewards/margins": 0.004901566542685032, + "eval_rewards/rejected": -0.01636713556945324, + "eval_runtime": 197.0399, + "eval_samples_per_second": 10.15, + "eval_steps_per_second": 5.075, + "step": 420 + }, + { + "epoch": 0.06, + "learning_rate": 2.8104575163398695e-06, + "logits/chosen": -3.0821690559387207, + "logits/rejected": -3.0689940452575684, + "logps/chosen": -339.2743225097656, + "logps/rejected": -310.23028564453125, + "loss": 0.6921, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.014210025779902935, + "rewards/margins": 0.002133010420948267, + "rewards/rejected": -0.01634303480386734, + "step": 430 + }, + { + "epoch": 0.06, + "eval_logits/chosen": -2.7430331707000732, + "eval_logits/rejected": -2.7368240356445312, + "eval_logps/chosen": -334.0163269042969, + "eval_logps/rejected": -302.885498046875, + "eval_loss": 0.6906515955924988, + "eval_rewards/accuracies": 0.590499997138977, + "eval_rewards/chosen": -0.012668982148170471, + "eval_rewards/margins": 0.005086148623377085, + "eval_rewards/rejected": -0.017755132168531418, + "eval_runtime": 196.9624, + "eval_samples_per_second": 10.154, + "eval_steps_per_second": 5.077, + "step": 430 + }, + { + "epoch": 0.06, + "learning_rate": 2.8758169934640523e-06, + "logits/chosen": -3.0886878967285156, + "logits/rejected": -3.089543342590332, + "logps/chosen": -328.46746826171875, + "logps/rejected": -295.8978576660156, + "loss": 0.6918, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -0.013683220371603966, + "rewards/margins": 0.0028565835673362017, + "rewards/rejected": -0.01653980277478695, + "step": 440 + }, + { + "epoch": 0.06, + "eval_logits/chosen": -2.7425713539123535, + "eval_logits/rejected": -2.736445426940918, + "eval_logps/chosen": -334.0881652832031, + "eval_logps/rejected": -303.01953125, + "eval_loss": 0.6903461813926697, + "eval_rewards/accuracies": 0.6004999876022339, + "eval_rewards/chosen": -0.013387652114033699, + "eval_rewards/margins": 0.005707699339836836, + "eval_rewards/rejected": -0.019095350056886673, + "eval_runtime": 196.9075, + "eval_samples_per_second": 10.157, + "eval_steps_per_second": 5.079, + "step": 440 + }, + { + "epoch": 0.06, + "learning_rate": 2.9411764705882355e-06, + "logits/chosen": -3.083773136138916, + "logits/rejected": -3.0679244995117188, + "logps/chosen": -336.1562194824219, + "logps/rejected": -342.18817138671875, + "loss": 0.6928, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": -0.017543237656354904, + "rewards/margins": 0.0008635501144453883, + "rewards/rejected": -0.018406789749860764, + "step": 450 + }, + { + "epoch": 0.06, + "eval_logits/chosen": -2.7423670291900635, + "eval_logits/rejected": -2.7363510131835938, + "eval_logps/chosen": -334.1518249511719, + "eval_logps/rejected": -303.076416015625, + "eval_loss": 0.6903823018074036, + "eval_rewards/accuracies": 0.6060000061988831, + "eval_rewards/chosen": -0.014023885130882263, + "eval_rewards/margins": 0.00563990930095315, + "eval_rewards/rejected": -0.019663793966174126, + "eval_runtime": 196.8415, + "eval_samples_per_second": 10.16, + "eval_steps_per_second": 5.08, + "step": 450 + }, + { + "epoch": 0.06, + "learning_rate": 3.0065359477124182e-06, + "logits/chosen": -3.0067667961120605, + "logits/rejected": -2.99423885345459, + "logps/chosen": -310.811767578125, + "logps/rejected": -287.95330810546875, + "loss": 0.6894, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.014639434404671192, + "rewards/margins": 0.007508780807256699, + "rewards/rejected": -0.022148214280605316, + "step": 460 + }, + { + "epoch": 0.06, + "eval_logits/chosen": -2.742067575454712, + "eval_logits/rejected": -2.736116409301758, + "eval_logps/chosen": -334.324951171875, + "eval_logps/rejected": -303.3060607910156, + "eval_loss": 0.6901097297668457, + "eval_rewards/accuracies": 0.6035000085830688, + "eval_rewards/chosen": -0.01575511507689953, + "eval_rewards/margins": 0.006205403245985508, + "eval_rewards/rejected": -0.021960517391562462, + "eval_runtime": 197.0302, + "eval_samples_per_second": 10.151, + "eval_steps_per_second": 5.075, + "step": 460 + }, + { + "epoch": 0.06, + "learning_rate": 3.071895424836602e-06, + "logits/chosen": -3.0282609462738037, + "logits/rejected": -3.040301561355591, + "logps/chosen": -332.8352966308594, + "logps/rejected": -307.71649169921875, + "loss": 0.6894, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.013653397560119629, + "rewards/margins": 0.0075346939265728, + "rewards/rejected": -0.02118808962404728, + "step": 470 + }, + { + "epoch": 0.06, + "eval_logits/chosen": -2.741609811782837, + "eval_logits/rejected": -2.735790967941284, + "eval_logps/chosen": -334.572021484375, + "eval_logps/rejected": -303.6496276855469, + "eval_loss": 0.6896440386772156, + "eval_rewards/accuracies": 0.6019999980926514, + "eval_rewards/chosen": -0.018226245418190956, + "eval_rewards/margins": 0.007169577293097973, + "eval_rewards/rejected": -0.025395819917321205, + "eval_runtime": 197.0911, + "eval_samples_per_second": 10.148, + "eval_steps_per_second": 5.074, + "step": 470 + }, + { + "epoch": 0.06, + "learning_rate": 3.1372549019607846e-06, + "logits/chosen": -3.0256853103637695, + "logits/rejected": -2.999748706817627, + "logps/chosen": -343.3094482421875, + "logps/rejected": -283.9912414550781, + "loss": 0.6896, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.017795979976654053, + "rewards/margins": 0.007141630165278912, + "rewards/rejected": -0.02493760921061039, + "step": 480 + }, + { + "epoch": 0.06, + "eval_logits/chosen": -2.7408573627471924, + "eval_logits/rejected": -2.735177755355835, + "eval_logps/chosen": -334.8725280761719, + "eval_logps/rejected": -304.0498962402344, + "eval_loss": 0.6891666054725647, + "eval_rewards/accuracies": 0.6104999780654907, + "eval_rewards/chosen": -0.021231109276413918, + "eval_rewards/margins": 0.00816798210144043, + "eval_rewards/rejected": -0.029399089515209198, + "eval_runtime": 196.9503, + "eval_samples_per_second": 10.155, + "eval_steps_per_second": 5.077, + "step": 480 + }, + { + "epoch": 0.06, + "learning_rate": 3.2026143790849674e-06, + "logits/chosen": -3.0702672004699707, + "logits/rejected": -3.0584235191345215, + "logps/chosen": -322.9806823730469, + "logps/rejected": -261.50433349609375, + "loss": 0.6878, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.025627601891756058, + "rewards/margins": 0.010860485024750233, + "rewards/rejected": -0.03648808225989342, + "step": 490 + }, + { + "epoch": 0.06, + "eval_logits/chosen": -2.740217447280884, + "eval_logits/rejected": -2.7347195148468018, + "eval_logps/chosen": -335.2659606933594, + "eval_logps/rejected": -304.5755920410156, + "eval_loss": 0.6885358095169067, + "eval_rewards/accuracies": 0.6079999804496765, + "eval_rewards/chosen": -0.025165580213069916, + "eval_rewards/margins": 0.009489987045526505, + "eval_rewards/rejected": -0.03465556725859642, + "eval_runtime": 197.049, + "eval_samples_per_second": 10.15, + "eval_steps_per_second": 5.075, + "step": 490 + }, + { + "epoch": 0.07, + "learning_rate": 3.2679738562091506e-06, + "logits/chosen": -2.9991683959960938, + "logits/rejected": -2.995518922805786, + "logps/chosen": -305.4998474121094, + "logps/rejected": -274.02459716796875, + "loss": 0.6893, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.027515623718500137, + "rewards/margins": 0.00802281778305769, + "rewards/rejected": -0.035538434982299805, + "step": 500 + }, + { + "epoch": 0.07, + "eval_logits/chosen": -2.7397196292877197, + "eval_logits/rejected": -2.734415292739868, + "eval_logps/chosen": -335.75225830078125, + "eval_logps/rejected": -305.1786193847656, + "eval_loss": 0.6879965662956238, + "eval_rewards/accuracies": 0.6144999861717224, + "eval_rewards/chosen": -0.030028536915779114, + "eval_rewards/margins": 0.010657698847353458, + "eval_rewards/rejected": -0.040686242282390594, + "eval_runtime": 197.0059, + "eval_samples_per_second": 10.152, + "eval_steps_per_second": 5.076, + "step": 500 + }, + { + "epoch": 0.07, + "learning_rate": 3.3333333333333333e-06, + "logits/chosen": -2.9843194484710693, + "logits/rejected": -2.99456524848938, + "logps/chosen": -302.5033264160156, + "logps/rejected": -304.94403076171875, + "loss": 0.6898, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.03312421962618828, + "rewards/margins": 0.007043605204671621, + "rewards/rejected": -0.040167830884456635, + "step": 510 + }, + { + "epoch": 0.07, + "eval_logits/chosen": -2.7393970489501953, + "eval_logits/rejected": -2.734170436859131, + "eval_logps/chosen": -336.0675048828125, + "eval_logps/rejected": -305.600830078125, + "eval_loss": 0.6874927282333374, + "eval_rewards/accuracies": 0.6169999837875366, + "eval_rewards/chosen": -0.03318093344569206, + "eval_rewards/margins": 0.011727489531040192, + "eval_rewards/rejected": -0.044908422976732254, + "eval_runtime": 197.0533, + "eval_samples_per_second": 10.15, + "eval_steps_per_second": 5.075, + "step": 510 + }, + { + "epoch": 0.07, + "learning_rate": 3.398692810457517e-06, + "logits/chosen": -2.9906728267669678, + "logits/rejected": -2.9100711345672607, + "logps/chosen": -305.5321960449219, + "logps/rejected": -309.1561584472656, + "loss": 0.6869, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.03266731649637222, + "rewards/margins": 0.012893171980977058, + "rewards/rejected": -0.04556048661470413, + "step": 520 + }, + { + "epoch": 0.07, + "eval_logits/chosen": -2.7389254570007324, + "eval_logits/rejected": -2.7339720726013184, + "eval_logps/chosen": -336.4316711425781, + "eval_logps/rejected": -306.0887451171875, + "eval_loss": 0.6869123578071594, + "eval_rewards/accuracies": 0.6134999990463257, + "eval_rewards/chosen": -0.03682265803217888, + "eval_rewards/margins": 0.012964564375579357, + "eval_rewards/rejected": -0.04978722333908081, + "eval_runtime": 196.9405, + "eval_samples_per_second": 10.155, + "eval_steps_per_second": 5.078, + "step": 520 + }, + { + "epoch": 0.07, + "learning_rate": 3.4640522875816997e-06, + "logits/chosen": -3.0058653354644775, + "logits/rejected": -3.0059714317321777, + "logps/chosen": -305.9553527832031, + "logps/rejected": -276.55523681640625, + "loss": 0.686, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.04011048376560211, + "rewards/margins": 0.014879104681313038, + "rewards/rejected": -0.054989587515592575, + "step": 530 + }, + { + "epoch": 0.07, + "eval_logits/chosen": -2.7384033203125, + "eval_logits/rejected": -2.733745574951172, + "eval_logps/chosen": -336.9488525390625, + "eval_logps/rejected": -306.7417907714844, + "eval_loss": 0.6862883567810059, + "eval_rewards/accuracies": 0.6150000095367432, + "eval_rewards/chosen": -0.0419941246509552, + "eval_rewards/margins": 0.014323660172522068, + "eval_rewards/rejected": -0.05631778761744499, + "eval_runtime": 197.1253, + "eval_samples_per_second": 10.146, + "eval_steps_per_second": 5.073, + "step": 530 + }, + { + "epoch": 0.07, + "learning_rate": 3.529411764705883e-06, + "logits/chosen": -3.044787883758545, + "logits/rejected": -3.0278046131134033, + "logps/chosen": -347.0563659667969, + "logps/rejected": -308.2681884765625, + "loss": 0.6835, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.040227990597486496, + "rewards/margins": 0.01987212896347046, + "rewards/rejected": -0.060100119560956955, + "step": 540 + }, + { + "epoch": 0.07, + "eval_logits/chosen": -2.738213062286377, + "eval_logits/rejected": -2.7338826656341553, + "eval_logps/chosen": -337.69158935546875, + "eval_logps/rejected": -307.684814453125, + "eval_loss": 0.6853721737861633, + "eval_rewards/accuracies": 0.6115000247955322, + "eval_rewards/chosen": -0.04942203685641289, + "eval_rewards/margins": 0.016325712203979492, + "eval_rewards/rejected": -0.06574775278568268, + "eval_runtime": 196.8498, + "eval_samples_per_second": 10.16, + "eval_steps_per_second": 5.08, + "step": 540 + }, + { + "epoch": 0.07, + "learning_rate": 3.5947712418300657e-06, + "logits/chosen": -3.015363931655884, + "logits/rejected": -2.9994001388549805, + "logps/chosen": -309.4760437011719, + "logps/rejected": -280.54541015625, + "loss": 0.6832, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.05051179602742195, + "rewards/margins": 0.020926663652062416, + "rewards/rejected": -0.07143845409154892, + "step": 550 + }, + { + "epoch": 0.07, + "eval_logits/chosen": -2.737447738647461, + "eval_logits/rejected": -2.7334887981414795, + "eval_logps/chosen": -338.6698913574219, + "eval_logps/rejected": -308.8882751464844, + "eval_loss": 0.6843726634979248, + "eval_rewards/accuracies": 0.6115000247955322, + "eval_rewards/chosen": -0.05920499563217163, + "eval_rewards/margins": 0.018577815964818, + "eval_rewards/rejected": -0.07778280973434448, + "eval_runtime": 197.1006, + "eval_samples_per_second": 10.147, + "eval_steps_per_second": 5.074, + "step": 550 + }, + { + "epoch": 0.07, + "learning_rate": 3.6601307189542484e-06, + "logits/chosen": -3.0518956184387207, + "logits/rejected": -3.001509666442871, + "logps/chosen": -364.0932312011719, + "logps/rejected": -352.0817565917969, + "loss": 0.6788, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.062454767525196075, + "rewards/margins": 0.03078216314315796, + "rewards/rejected": -0.09323693811893463, + "step": 560 + }, + { + "epoch": 0.07, + "eval_logits/chosen": -2.736476182937622, + "eval_logits/rejected": -2.7330868244171143, + "eval_logps/chosen": -339.8880615234375, + "eval_logps/rejected": -310.3548889160156, + "eval_loss": 0.6832955479621887, + "eval_rewards/accuracies": 0.6150000095367432, + "eval_rewards/chosen": -0.0713866651058197, + "eval_rewards/margins": 0.021062159910798073, + "eval_rewards/rejected": -0.09244882315397263, + "eval_runtime": 196.7818, + "eval_samples_per_second": 10.164, + "eval_steps_per_second": 5.082, + "step": 560 + }, + { + "epoch": 0.07, + "learning_rate": 3.7254901960784316e-06, + "logits/chosen": -3.0795345306396484, + "logits/rejected": -3.0690414905548096, + "logps/chosen": -329.28558349609375, + "logps/rejected": -307.31707763671875, + "loss": 0.6815, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.0798201709985733, + "rewards/margins": 0.024440856650471687, + "rewards/rejected": -0.10426102578639984, + "step": 570 + }, + { + "epoch": 0.07, + "eval_logits/chosen": -2.7351739406585693, + "eval_logits/rejected": -2.732409954071045, + "eval_logps/chosen": -341.1984558105469, + "eval_logps/rejected": -311.93963623046875, + "eval_loss": 0.6821067929267883, + "eval_rewards/accuracies": 0.6075000166893005, + "eval_rewards/chosen": -0.08449088037014008, + "eval_rewards/margins": 0.02380536124110222, + "eval_rewards/rejected": -0.10829625278711319, + "eval_runtime": 196.9602, + "eval_samples_per_second": 10.154, + "eval_steps_per_second": 5.077, + "step": 570 + }, + { + "epoch": 0.08, + "learning_rate": 3.7908496732026144e-06, + "logits/chosen": -3.0217106342315674, + "logits/rejected": -2.97151255607605, + "logps/chosen": -386.537109375, + "logps/rejected": -340.64154052734375, + "loss": 0.678, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.08288892358541489, + "rewards/margins": 0.032978884875774384, + "rewards/rejected": -0.11586780846118927, + "step": 580 + }, + { + "epoch": 0.08, + "eval_logits/chosen": -2.7337937355041504, + "eval_logits/rejected": -2.7314746379852295, + "eval_logps/chosen": -342.7324523925781, + "eval_logps/rejected": -313.7508850097656, + "eval_loss": 0.6809699535369873, + "eval_rewards/accuracies": 0.6069999933242798, + "eval_rewards/chosen": -0.0998305007815361, + "eval_rewards/margins": 0.026577942073345184, + "eval_rewards/rejected": -0.1264084428548813, + "eval_runtime": 196.9667, + "eval_samples_per_second": 10.154, + "eval_steps_per_second": 5.077, + "step": 580 + }, + { + "epoch": 0.08, + "learning_rate": 3.856209150326798e-06, + "logits/chosen": -3.018777847290039, + "logits/rejected": -2.9784789085388184, + "logps/chosen": -346.2818298339844, + "logps/rejected": -310.3592529296875, + "loss": 0.6724, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.09781802445650101, + "rewards/margins": 0.0433654822409153, + "rewards/rejected": -0.1411834955215454, + "step": 590 + }, + { + "epoch": 0.08, + "eval_logits/chosen": -2.7325494289398193, + "eval_logits/rejected": -2.7310123443603516, + "eval_logps/chosen": -344.8931579589844, + "eval_logps/rejected": -316.2652587890625, + "eval_loss": 0.6795624494552612, + "eval_rewards/accuracies": 0.5960000157356262, + "eval_rewards/chosen": -0.12143778055906296, + "eval_rewards/margins": 0.030114755034446716, + "eval_rewards/rejected": -0.15155255794525146, + "eval_runtime": 196.8786, + "eval_samples_per_second": 10.159, + "eval_steps_per_second": 5.079, + "step": 590 + }, + { + "epoch": 0.08, + "learning_rate": 3.92156862745098e-06, + "logits/chosen": -2.968508720397949, + "logits/rejected": -2.948955535888672, + "logps/chosen": -309.74383544921875, + "logps/rejected": -293.50933837890625, + "loss": 0.686, + "rewards/accuracies": 0.512499988079071, + "rewards/chosen": -0.1398414522409439, + "rewards/margins": 0.01775265857577324, + "rewards/rejected": -0.15759411454200745, + "step": 600 + }, + { + "epoch": 0.08, + "eval_logits/chosen": -2.730149030685425, + "eval_logits/rejected": -2.7294833660125732, + "eval_logps/chosen": -347.0211181640625, + "eval_logps/rejected": -318.7592468261719, + "eval_loss": 0.6780930757522583, + "eval_rewards/accuracies": 0.6000000238418579, + "eval_rewards/chosen": -0.1427169293165207, + "eval_rewards/margins": 0.033775582909584045, + "eval_rewards/rejected": -0.17649252712726593, + "eval_runtime": 197.0711, + "eval_samples_per_second": 10.149, + "eval_steps_per_second": 5.074, + "step": 600 + }, + { + "epoch": 0.08, + "learning_rate": 3.986928104575164e-06, + "logits/chosen": -3.0006091594696045, + "logits/rejected": -2.957949161529541, + "logps/chosen": -296.22698974609375, + "logps/rejected": -255.9561767578125, + "loss": 0.6694, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.12925231456756592, + "rewards/margins": 0.05154203251004219, + "rewards/rejected": -0.18079432845115662, + "step": 610 + }, + { + "epoch": 0.08, + "eval_logits/chosen": -2.7311341762542725, + "eval_logits/rejected": -2.731069564819336, + "eval_logps/chosen": -348.4164733886719, + "eval_logps/rejected": -320.3974304199219, + "eval_loss": 0.677168607711792, + "eval_rewards/accuracies": 0.6025000214576721, + "eval_rewards/chosen": -0.15667042136192322, + "eval_rewards/margins": 0.03620406240224838, + "eval_rewards/rejected": -0.192874476313591, + "eval_runtime": 197.0649, + "eval_samples_per_second": 10.149, + "eval_steps_per_second": 5.074, + "step": 610 + }, + { + "epoch": 0.08, + "learning_rate": 4.052287581699347e-06, + "logits/chosen": -2.987593173980713, + "logits/rejected": -2.9816346168518066, + "logps/chosen": -366.7933044433594, + "logps/rejected": -338.0000915527344, + "loss": 0.6606, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.16044440865516663, + "rewards/margins": 0.07138291746377945, + "rewards/rejected": -0.23182733356952667, + "step": 620 + }, + { + "epoch": 0.08, + "eval_logits/chosen": -2.731865406036377, + "eval_logits/rejected": -2.7336010932922363, + "eval_logps/chosen": -353.2502746582031, + "eval_logps/rejected": -325.7889404296875, + "eval_loss": 0.6752864718437195, + "eval_rewards/accuracies": 0.6079999804496765, + "eval_rewards/chosen": -0.20500893890857697, + "eval_rewards/margins": 0.04177996888756752, + "eval_rewards/rejected": -0.2467889040708542, + "eval_runtime": 197.1487, + "eval_samples_per_second": 10.145, + "eval_steps_per_second": 5.072, + "step": 620 + }, + { + "epoch": 0.08, + "learning_rate": 4.11764705882353e-06, + "logits/chosen": -2.906384229660034, + "logits/rejected": -2.9101319313049316, + "logps/chosen": -342.6524353027344, + "logps/rejected": -319.92333984375, + "loss": 0.668, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.22237953543663025, + "rewards/margins": 0.05616650730371475, + "rewards/rejected": -0.2785460650920868, + "step": 630 + }, + { + "epoch": 0.08, + "eval_logits/chosen": -2.726879358291626, + "eval_logits/rejected": -2.730696678161621, + "eval_logps/chosen": -358.658935546875, + "eval_logps/rejected": -331.7452087402344, + "eval_loss": 0.6736900210380554, + "eval_rewards/accuracies": 0.6019999980926514, + "eval_rewards/chosen": -0.25909480452537537, + "eval_rewards/margins": 0.0472571887075901, + "eval_rewards/rejected": -0.30635198950767517, + "eval_runtime": 196.8854, + "eval_samples_per_second": 10.158, + "eval_steps_per_second": 5.079, + "step": 630 + }, + { + "epoch": 0.08, + "learning_rate": 4.183006535947713e-06, + "logits/chosen": -3.0391154289245605, + "logits/rejected": -3.0323967933654785, + "logps/chosen": -351.0347595214844, + "logps/rejected": -331.3139343261719, + "loss": 0.6781, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.28845497965812683, + "rewards/margins": 0.03852443769574165, + "rewards/rejected": -0.3269794285297394, + "step": 640 + }, + { + "epoch": 0.08, + "eval_logits/chosen": -2.716712236404419, + "eval_logits/rejected": -2.722219228744507, + "eval_logps/chosen": -363.30462646484375, + "eval_logps/rejected": -336.9739074707031, + "eval_loss": 0.6720592975616455, + "eval_rewards/accuracies": 0.6075000166893005, + "eval_rewards/chosen": -0.3055519461631775, + "eval_rewards/margins": 0.05308679863810539, + "eval_rewards/rejected": -0.358638733625412, + "eval_runtime": 196.902, + "eval_samples_per_second": 10.157, + "eval_steps_per_second": 5.079, + "step": 640 + }, + { + "epoch": 0.09, + "learning_rate": 4.2483660130718954e-06, + "logits/chosen": -2.966703414916992, + "logits/rejected": -2.988459587097168, + "logps/chosen": -348.847900390625, + "logps/rejected": -336.23443603515625, + "loss": 0.6732, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.29965394735336304, + "rewards/margins": 0.05004773288965225, + "rewards/rejected": -0.3497017025947571, + "step": 650 + }, + { + "epoch": 0.09, + "eval_logits/chosen": -2.7116191387176514, + "eval_logits/rejected": -2.7170767784118652, + "eval_logps/chosen": -361.92413330078125, + "eval_logps/rejected": -335.84771728515625, + "eval_loss": 0.6707616448402405, + "eval_rewards/accuracies": 0.609499990940094, + "eval_rewards/chosen": -0.2917468845844269, + "eval_rewards/margins": 0.05562999099493027, + "eval_rewards/rejected": -0.34737691283226013, + "eval_runtime": 196.9115, + "eval_samples_per_second": 10.157, + "eval_steps_per_second": 5.078, + "step": 650 + }, + { + "epoch": 0.09, + "learning_rate": 4.313725490196079e-06, + "logits/chosen": -2.990095615386963, + "logits/rejected": -2.948988914489746, + "logps/chosen": -390.239990234375, + "logps/rejected": -324.51409912109375, + "loss": 0.6768, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.28673607110977173, + "rewards/margins": 0.044372208416461945, + "rewards/rejected": -0.3311082720756531, + "step": 660 + }, + { + "epoch": 0.09, + "eval_logits/chosen": -2.7104814052581787, + "eval_logits/rejected": -2.7157156467437744, + "eval_logps/chosen": -361.3223876953125, + "eval_logps/rejected": -335.4339904785156, + "eval_loss": 0.6699734330177307, + "eval_rewards/accuracies": 0.6129999756813049, + "eval_rewards/chosen": -0.2857293486595154, + "eval_rewards/margins": 0.057510748505592346, + "eval_rewards/rejected": -0.3432401120662689, + "eval_runtime": 196.9573, + "eval_samples_per_second": 10.154, + "eval_steps_per_second": 5.077, + "step": 660 + }, + { + "epoch": 0.09, + "learning_rate": 4.379084967320262e-06, + "logits/chosen": -3.0028021335601807, + "logits/rejected": -2.9637579917907715, + "logps/chosen": -373.4584655761719, + "logps/rejected": -390.0169677734375, + "loss": 0.667, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.27895650267601013, + "rewards/margins": 0.06341058760881424, + "rewards/rejected": -0.34236711263656616, + "step": 670 + }, + { + "epoch": 0.09, + "eval_logits/chosen": -2.7081527709960938, + "eval_logits/rejected": -2.7128231525421143, + "eval_logps/chosen": -361.0499572753906, + "eval_logps/rejected": -335.34429931640625, + "eval_loss": 0.6692450642585754, + "eval_rewards/accuracies": 0.6184999942779541, + "eval_rewards/chosen": -0.2830057144165039, + "eval_rewards/margins": 0.05933738872408867, + "eval_rewards/rejected": -0.3423430919647217, + "eval_runtime": 196.8698, + "eval_samples_per_second": 10.159, + "eval_steps_per_second": 5.079, + "step": 670 + }, + { + "epoch": 0.09, + "learning_rate": 4.444444444444444e-06, + "logits/chosen": -3.0348241329193115, + "logits/rejected": -3.0564351081848145, + "logps/chosen": -363.07989501953125, + "logps/rejected": -341.8347473144531, + "loss": 0.6693, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.26485368609428406, + "rewards/margins": 0.05861664563417435, + "rewards/rejected": -0.3234703540802002, + "step": 680 + }, + { + "epoch": 0.09, + "eval_logits/chosen": -2.7053518295288086, + "eval_logits/rejected": -2.7100462913513184, + "eval_logps/chosen": -362.4294128417969, + "eval_logps/rejected": -336.986083984375, + "eval_loss": 0.6684760451316833, + "eval_rewards/accuracies": 0.6144999861717224, + "eval_rewards/chosen": -0.2967996597290039, + "eval_rewards/margins": 0.06196107342839241, + "eval_rewards/rejected": -0.3587607443332672, + "eval_runtime": 196.8194, + "eval_samples_per_second": 10.162, + "eval_steps_per_second": 5.081, + "step": 680 + }, + { + "epoch": 0.09, + "learning_rate": 4.509803921568628e-06, + "logits/chosen": -3.026939630508423, + "logits/rejected": -2.987724781036377, + "logps/chosen": -395.9893493652344, + "logps/rejected": -351.2083740234375, + "loss": 0.6707, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.2795209288597107, + "rewards/margins": 0.05711476877331734, + "rewards/rejected": -0.3366357386112213, + "step": 690 + }, + { + "epoch": 0.09, + "eval_logits/chosen": -2.7039105892181396, + "eval_logits/rejected": -2.708899974822998, + "eval_logps/chosen": -363.3128967285156, + "eval_logps/rejected": -338.14306640625, + "eval_loss": 0.6675823926925659, + "eval_rewards/accuracies": 0.6150000095367432, + "eval_rewards/chosen": -0.3056354224681854, + "eval_rewards/margins": 0.06469501554965973, + "eval_rewards/rejected": -0.37033045291900635, + "eval_runtime": 197.1839, + "eval_samples_per_second": 10.143, + "eval_steps_per_second": 5.071, + "step": 690 + }, + { + "epoch": 0.09, + "learning_rate": 4.5751633986928105e-06, + "logits/chosen": -3.018585681915283, + "logits/rejected": -3.023458480834961, + "logps/chosen": -367.96673583984375, + "logps/rejected": -350.847412109375, + "loss": 0.6718, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.29729920625686646, + "rewards/margins": 0.05686334893107414, + "rewards/rejected": -0.3541625738143921, + "step": 700 + }, + { + "epoch": 0.09, + "eval_logits/chosen": -2.7001843452453613, + "eval_logits/rejected": -2.7050814628601074, + "eval_logps/chosen": -362.6632080078125, + "eval_logps/rejected": -337.5663146972656, + "eval_loss": 0.6671297550201416, + "eval_rewards/accuracies": 0.6144999861717224, + "eval_rewards/chosen": -0.29913830757141113, + "eval_rewards/margins": 0.06542481482028961, + "eval_rewards/rejected": -0.36456310749053955, + "eval_runtime": 197.0044, + "eval_samples_per_second": 10.152, + "eval_steps_per_second": 5.076, + "step": 700 + }, + { + "epoch": 0.09, + "learning_rate": 4.640522875816994e-06, + "logits/chosen": -3.025627851486206, + "logits/rejected": -3.008409261703491, + "logps/chosen": -374.4230041503906, + "logps/rejected": -355.96136474609375, + "loss": 0.6511, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.29435834288597107, + "rewards/margins": 0.09984922409057617, + "rewards/rejected": -0.39420756697654724, + "step": 710 + }, + { + "epoch": 0.09, + "eval_logits/chosen": -2.6939971446990967, + "eval_logits/rejected": -2.699645519256592, + "eval_logps/chosen": -366.7897644042969, + "eval_logps/rejected": -342.2338562011719, + "eval_loss": 0.6659175157546997, + "eval_rewards/accuracies": 0.6134999990463257, + "eval_rewards/chosen": -0.3404030501842499, + "eval_rewards/margins": 0.07083506137132645, + "eval_rewards/rejected": -0.4112381339073181, + "eval_runtime": 196.9928, + "eval_samples_per_second": 10.153, + "eval_steps_per_second": 5.076, + "step": 710 + }, + { + "epoch": 0.09, + "learning_rate": 4.705882352941177e-06, + "logits/chosen": -2.976383686065674, + "logits/rejected": -2.9598684310913086, + "logps/chosen": -433.13421630859375, + "logps/rejected": -401.3202209472656, + "loss": 0.6685, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.37345483899116516, + "rewards/margins": 0.07345791161060333, + "rewards/rejected": -0.4469127655029297, + "step": 720 + }, + { + "epoch": 0.09, + "eval_logits/chosen": -2.690136194229126, + "eval_logits/rejected": -2.6963469982147217, + "eval_logps/chosen": -369.37225341796875, + "eval_logps/rejected": -345.1917419433594, + "eval_loss": 0.6651197671890259, + "eval_rewards/accuracies": 0.6110000014305115, + "eval_rewards/chosen": -0.36622846126556396, + "eval_rewards/margins": 0.07458891719579697, + "eval_rewards/rejected": -0.44081738591194153, + "eval_runtime": 196.8075, + "eval_samples_per_second": 10.162, + "eval_steps_per_second": 5.081, + "step": 720 + }, + { + "epoch": 0.1, + "learning_rate": 4.77124183006536e-06, + "logits/chosen": -2.9549527168273926, + "logits/rejected": -2.9584693908691406, + "logps/chosen": -390.4580383300781, + "logps/rejected": -374.32147216796875, + "loss": 0.6702, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.3789103031158447, + "rewards/margins": 0.0608968660235405, + "rewards/rejected": -0.43980711698532104, + "step": 730 + }, + { + "epoch": 0.1, + "eval_logits/chosen": -2.6869893074035645, + "eval_logits/rejected": -2.6939523220062256, + "eval_logps/chosen": -371.3726501464844, + "eval_logps/rejected": -347.4822082519531, + "eval_loss": 0.6645473837852478, + "eval_rewards/accuracies": 0.6134999990463257, + "eval_rewards/chosen": -0.38623228669166565, + "eval_rewards/margins": 0.07748986035585403, + "eval_rewards/rejected": -0.4637221693992615, + "eval_runtime": 197.0176, + "eval_samples_per_second": 10.151, + "eval_steps_per_second": 5.076, + "step": 730 + }, + { + "epoch": 0.1, + "learning_rate": 4.836601307189543e-06, + "logits/chosen": -2.9691169261932373, + "logits/rejected": -2.9327917098999023, + "logps/chosen": -390.6695556640625, + "logps/rejected": -331.70257568359375, + "loss": 0.6723, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.34061798453330994, + "rewards/margins": 0.05245697498321533, + "rewards/rejected": -0.3930749297142029, + "step": 740 + }, + { + "epoch": 0.1, + "eval_logits/chosen": -2.688300371170044, + "eval_logits/rejected": -2.695115327835083, + "eval_logps/chosen": -369.962646484375, + "eval_logps/rejected": -346.0401916503906, + "eval_loss": 0.6643568277359009, + "eval_rewards/accuracies": 0.6144999861717224, + "eval_rewards/chosen": -0.3721325099468231, + "eval_rewards/margins": 0.07716938108205795, + "eval_rewards/rejected": -0.44930192828178406, + "eval_runtime": 196.9446, + "eval_samples_per_second": 10.155, + "eval_steps_per_second": 5.078, + "step": 740 + }, + { + "epoch": 0.1, + "learning_rate": 4.901960784313726e-06, + "logits/chosen": -2.872075319290161, + "logits/rejected": -2.8731420040130615, + "logps/chosen": -331.8930969238281, + "logps/rejected": -321.7856140136719, + "loss": 0.6484, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.34029215574264526, + "rewards/margins": 0.10654574632644653, + "rewards/rejected": -0.4468379020690918, + "step": 750 + }, + { + "epoch": 0.1, + "eval_logits/chosen": -2.6878302097320557, + "eval_logits/rejected": -2.695065975189209, + "eval_logps/chosen": -372.1144104003906, + "eval_logps/rejected": -348.4941711425781, + "eval_loss": 0.6637778878211975, + "eval_rewards/accuracies": 0.6110000014305115, + "eval_rewards/chosen": -0.3936500549316406, + "eval_rewards/margins": 0.0801912397146225, + "eval_rewards/rejected": -0.47384127974510193, + "eval_runtime": 197.1868, + "eval_samples_per_second": 10.143, + "eval_steps_per_second": 5.071, + "step": 750 + }, + { + "epoch": 0.1, + "learning_rate": 4.967320261437909e-06, + "logits/chosen": -2.959843158721924, + "logits/rejected": -2.97227144241333, + "logps/chosen": -369.81719970703125, + "logps/rejected": -319.16412353515625, + "loss": 0.6701, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.360421359539032, + "rewards/margins": 0.057536423206329346, + "rewards/rejected": -0.41795778274536133, + "step": 760 + }, + { + "epoch": 0.1, + "eval_logits/chosen": -2.688547134399414, + "eval_logits/rejected": -2.695556879043579, + "eval_logps/chosen": -370.23651123046875, + "eval_logps/rejected": -346.562744140625, + "eval_loss": 0.6633652448654175, + "eval_rewards/accuracies": 0.6159999966621399, + "eval_rewards/chosen": -0.37487098574638367, + "eval_rewards/margins": 0.079656220972538, + "eval_rewards/rejected": -0.45452719926834106, + "eval_runtime": 197.0589, + "eval_samples_per_second": 10.149, + "eval_steps_per_second": 5.075, + "step": 760 + }, + { + "epoch": 0.1, + "learning_rate": 4.999993476542427e-06, + "logits/chosen": -2.996096134185791, + "logits/rejected": -2.9806487560272217, + "logps/chosen": -382.1415100097656, + "logps/rejected": -356.4634704589844, + "loss": 0.6578, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.33577603101730347, + "rewards/margins": 0.09010833501815796, + "rewards/rejected": -0.4258843958377838, + "step": 770 + }, + { + "epoch": 0.1, + "eval_logits/chosen": -2.6854639053344727, + "eval_logits/rejected": -2.6923415660858154, + "eval_logps/chosen": -372.1638488769531, + "eval_logps/rejected": -348.6732482910156, + "eval_loss": 0.663102924823761, + "eval_rewards/accuracies": 0.6140000224113464, + "eval_rewards/chosen": -0.3941444158554077, + "eval_rewards/margins": 0.08148758113384247, + "eval_rewards/rejected": -0.4756320118904114, + "eval_runtime": 197.3159, + "eval_samples_per_second": 10.136, + "eval_steps_per_second": 5.068, + "step": 770 + }, + { + "epoch": 0.1, + "learning_rate": 4.999941289086112e-06, + "logits/chosen": -2.9802470207214355, + "logits/rejected": -2.9475085735321045, + "logps/chosen": -388.72674560546875, + "logps/rejected": -349.171875, + "loss": 0.6555, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.3842027187347412, + "rewards/margins": 0.10800528526306152, + "rewards/rejected": -0.49220794439315796, + "step": 780 + }, + { + "epoch": 0.1, + "eval_logits/chosen": -2.6853272914886475, + "eval_logits/rejected": -2.6920483112335205, + "eval_logps/chosen": -372.9538269042969, + "eval_logps/rejected": -349.61456298828125, + "eval_loss": 0.6627827882766724, + "eval_rewards/accuracies": 0.6169999837875366, + "eval_rewards/chosen": -0.40204355120658875, + "eval_rewards/margins": 0.08300190418958664, + "eval_rewards/rejected": -0.48504549264907837, + "eval_runtime": 197.0362, + "eval_samples_per_second": 10.15, + "eval_steps_per_second": 5.075, + "step": 780 + }, + { + "epoch": 0.1, + "learning_rate": 4.999836915262896e-06, + "logits/chosen": -2.9006078243255615, + "logits/rejected": -2.9233975410461426, + "logps/chosen": -375.62646484375, + "logps/rejected": -387.4599609375, + "loss": 0.6354, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.3945903480052948, + "rewards/margins": 0.14294961094856262, + "rewards/rejected": -0.5375399589538574, + "step": 790 + }, + { + "epoch": 0.1, + "eval_logits/chosen": -2.6717944145202637, + "eval_logits/rejected": -2.679598331451416, + "eval_logps/chosen": -378.26531982421875, + "eval_logps/rejected": -355.6182556152344, + "eval_loss": 0.6618691682815552, + "eval_rewards/accuracies": 0.609499990940094, + "eval_rewards/chosen": -0.45515894889831543, + "eval_rewards/margins": 0.08992352336645126, + "eval_rewards/rejected": -0.5450823903083801, + "eval_runtime": 197.0915, + "eval_samples_per_second": 10.148, + "eval_steps_per_second": 5.074, + "step": 790 + }, + { + "epoch": 0.1, + "learning_rate": 4.999680357251587e-06, + "logits/chosen": -2.7958996295928955, + "logits/rejected": -2.850475788116455, + "logps/chosen": -355.81787109375, + "logps/rejected": -378.65435791015625, + "loss": 0.643, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.45146241784095764, + "rewards/margins": 0.12599804997444153, + "rewards/rejected": -0.577460527420044, + "step": 800 + }, + { + "epoch": 0.1, + "eval_logits/chosen": -2.656829595565796, + "eval_logits/rejected": -2.6665048599243164, + "eval_logps/chosen": -383.33489990234375, + "eval_logps/rejected": -361.54486083984375, + "eval_loss": 0.6610292196273804, + "eval_rewards/accuracies": 0.609499990940094, + "eval_rewards/chosen": -0.5058547854423523, + "eval_rewards/margins": 0.09849373996257782, + "eval_rewards/rejected": -0.6043485999107361, + "eval_runtime": 197.0677, + "eval_samples_per_second": 10.149, + "eval_steps_per_second": 5.074, + "step": 800 + }, + { + "epoch": 0.11, + "learning_rate": 4.999471618320339e-06, + "logits/chosen": -2.8944575786590576, + "logits/rejected": -2.9122395515441895, + "logps/chosen": -402.0007019042969, + "logps/rejected": -359.7335510253906, + "loss": 0.6762, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.4933040142059326, + "rewards/margins": 0.05890519544482231, + "rewards/rejected": -0.5522092580795288, + "step": 810 + }, + { + "epoch": 0.11, + "eval_logits/chosen": -2.650148868560791, + "eval_logits/rejected": -2.6608121395111084, + "eval_logps/chosen": -381.2932434082031, + "eval_logps/rejected": -359.6947937011719, + "eval_loss": 0.660219669342041, + "eval_rewards/accuracies": 0.6110000014305115, + "eval_rewards/chosen": -0.48543816804885864, + "eval_rewards/margins": 0.10040930658578873, + "eval_rewards/rejected": -0.5858475565910339, + "eval_runtime": 197.1031, + "eval_samples_per_second": 10.147, + "eval_steps_per_second": 5.073, + "step": 810 + }, + { + "epoch": 0.11, + "learning_rate": 4.999210702826586e-06, + "logits/chosen": -3.0301737785339355, + "logits/rejected": -3.028296709060669, + "logps/chosen": -423.05059814453125, + "logps/rejected": -380.2186279296875, + "loss": 0.6559, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.47225722670555115, + "rewards/margins": 0.10403690487146378, + "rewards/rejected": -0.5762940645217896, + "step": 820 + }, + { + "epoch": 0.11, + "eval_logits/chosen": -2.6431291103363037, + "eval_logits/rejected": -2.6536731719970703, + "eval_logps/chosen": -380.1671447753906, + "eval_logps/rejected": -358.74859619140625, + "eval_loss": 0.6598737835884094, + "eval_rewards/accuracies": 0.6104999780654907, + "eval_rewards/chosen": -0.47417721152305603, + "eval_rewards/margins": 0.10220862179994583, + "eval_rewards/rejected": -0.5763858556747437, + "eval_runtime": 197.3715, + "eval_samples_per_second": 10.133, + "eval_steps_per_second": 5.067, + "step": 820 + }, + { + "epoch": 0.11, + "learning_rate": 4.998897616216947e-06, + "logits/chosen": -2.8779802322387695, + "logits/rejected": -2.903449535369873, + "logps/chosen": -321.4455261230469, + "logps/rejected": -371.50054931640625, + "loss": 0.638, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.4697656035423279, + "rewards/margins": 0.14891940355300903, + "rewards/rejected": -0.6186850070953369, + "step": 830 + }, + { + "epoch": 0.11, + "eval_logits/chosen": -2.624596118927002, + "eval_logits/rejected": -2.6360020637512207, + "eval_logps/chosen": -389.80560302734375, + "eval_logps/rejected": -369.4499816894531, + "eval_loss": 0.6608967185020447, + "eval_rewards/accuracies": 0.6039999723434448, + "eval_rewards/chosen": -0.5705617666244507, + "eval_rewards/margins": 0.11283760517835617, + "eval_rewards/rejected": -0.6833993792533875, + "eval_runtime": 197.2478, + "eval_samples_per_second": 10.14, + "eval_steps_per_second": 5.07, + "step": 830 + }, + { + "epoch": 0.11, + "learning_rate": 4.998532365027117e-06, + "logits/chosen": -2.783334970474243, + "logits/rejected": -2.809696674346924, + "logps/chosen": -391.5068054199219, + "logps/rejected": -329.7892761230469, + "loss": 0.6485, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.5235930681228638, + "rewards/margins": 0.12364151328802109, + "rewards/rejected": -0.6472345590591431, + "step": 840 + }, + { + "epoch": 0.11, + "eval_logits/chosen": -2.616718292236328, + "eval_logits/rejected": -2.6274757385253906, + "eval_logps/chosen": -393.76824951171875, + "eval_logps/rejected": -373.88800048828125, + "eval_loss": 0.6622524261474609, + "eval_rewards/accuracies": 0.6044999957084656, + "eval_rewards/chosen": -0.6101884841918945, + "eval_rewards/margins": 0.11759106814861298, + "eval_rewards/rejected": -0.7277796268463135, + "eval_runtime": 197.2914, + "eval_samples_per_second": 10.137, + "eval_steps_per_second": 5.069, + "step": 840 + }, + { + "epoch": 0.11, + "learning_rate": 4.9981149568817275e-06, + "logits/chosen": -2.8706066608428955, + "logits/rejected": -2.874828577041626, + "logps/chosen": -396.6725158691406, + "logps/rejected": -420.00732421875, + "loss": 0.6393, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.529072105884552, + "rewards/margins": 0.15510477125644684, + "rewards/rejected": -0.6841768026351929, + "step": 850 + }, + { + "epoch": 0.11, + "eval_logits/chosen": -2.6060431003570557, + "eval_logits/rejected": -2.616872549057007, + "eval_logps/chosen": -398.9680480957031, + "eval_logps/rejected": -379.9243469238281, + "eval_loss": 0.664020299911499, + "eval_rewards/accuracies": 0.6054999828338623, + "eval_rewards/chosen": -0.6621867418289185, + "eval_rewards/margins": 0.1259564757347107, + "eval_rewards/rejected": -0.7881432771682739, + "eval_runtime": 196.8874, + "eval_samples_per_second": 10.158, + "eval_steps_per_second": 5.079, + "step": 850 + }, + { + "epoch": 0.11, + "learning_rate": 4.997645400494192e-06, + "logits/chosen": -2.8616645336151123, + "logits/rejected": -2.839806079864502, + "logps/chosen": -367.6358947753906, + "logps/rejected": -367.6234436035156, + "loss": 0.6465, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.676906943321228, + "rewards/margins": 0.1602444052696228, + "rewards/rejected": -0.8371513485908508, + "step": 860 + }, + { + "epoch": 0.11, + "eval_logits/chosen": -2.5956826210021973, + "eval_logits/rejected": -2.6065311431884766, + "eval_logps/chosen": -405.5986633300781, + "eval_logps/rejected": -387.4784240722656, + "eval_loss": 0.6669895052909851, + "eval_rewards/accuracies": 0.6069999933242798, + "eval_rewards/chosen": -0.7284926772117615, + "eval_rewards/margins": 0.13519158959388733, + "eval_rewards/rejected": -0.8636841773986816, + "eval_runtime": 196.8502, + "eval_samples_per_second": 10.16, + "eval_steps_per_second": 5.08, + "step": 860 + }, + { + "epoch": 0.11, + "learning_rate": 4.997123705666514e-06, + "logits/chosen": -2.844677448272705, + "logits/rejected": -2.8251328468322754, + "logps/chosen": -411.3539123535156, + "logps/rejected": -404.601806640625, + "loss": 0.6604, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.6887822151184082, + "rewards/margins": 0.11688725650310516, + "rewards/rejected": -0.8056694865226746, + "step": 870 + }, + { + "epoch": 0.11, + "eval_logits/chosen": -2.6024270057678223, + "eval_logits/rejected": -2.6138432025909424, + "eval_logps/chosen": -401.89031982421875, + "eval_logps/rejected": -383.3900146484375, + "eval_loss": 0.664444088935852, + "eval_rewards/accuracies": 0.6060000061988831, + "eval_rewards/chosen": -0.6914088129997253, + "eval_rewards/margins": 0.13139095902442932, + "eval_rewards/rejected": -0.8227998614311218, + "eval_runtime": 196.9191, + "eval_samples_per_second": 10.156, + "eval_steps_per_second": 5.078, + "step": 870 + }, + { + "epoch": 0.12, + "learning_rate": 4.996549883289093e-06, + "logits/chosen": -2.82551646232605, + "logits/rejected": -2.7892441749572754, + "logps/chosen": -384.5074157714844, + "logps/rejected": -408.62579345703125, + "loss": 0.6875, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.8276923894882202, + "rewards/margins": 0.09274474531412125, + "rewards/rejected": -0.9204371571540833, + "step": 880 + }, + { + "epoch": 0.12, + "eval_logits/chosen": -2.5992438793182373, + "eval_logits/rejected": -2.6100361347198486, + "eval_logps/chosen": -407.5960998535156, + "eval_logps/rejected": -389.8180847167969, + "eval_loss": 0.666872501373291, + "eval_rewards/accuracies": 0.6014999747276306, + "eval_rewards/chosen": -0.7484666705131531, + "eval_rewards/margins": 0.13861419260501862, + "eval_rewards/rejected": -0.8870808482170105, + "eval_runtime": 197.1631, + "eval_samples_per_second": 10.144, + "eval_steps_per_second": 5.072, + "step": 880 + }, + { + "epoch": 0.12, + "learning_rate": 4.995923945340495e-06, + "logits/chosen": -2.87914776802063, + "logits/rejected": -2.8681235313415527, + "logps/chosen": -388.1961364746094, + "logps/rejected": -399.79119873046875, + "loss": 0.6721, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.7051995992660522, + "rewards/margins": 0.10840250551700592, + "rewards/rejected": -0.8136021494865417, + "step": 890 + }, + { + "epoch": 0.12, + "eval_logits/chosen": -2.610374927520752, + "eval_logits/rejected": -2.620400905609131, + "eval_logps/chosen": -401.38885498046875, + "eval_logps/rejected": -383.13677978515625, + "eval_loss": 0.6637265682220459, + "eval_rewards/accuracies": 0.6060000061988831, + "eval_rewards/chosen": -0.6863947510719299, + "eval_rewards/margins": 0.13387317955493927, + "eval_rewards/rejected": -0.8202678561210632, + "eval_runtime": 196.9446, + "eval_samples_per_second": 10.155, + "eval_steps_per_second": 5.078, + "step": 890 + }, + { + "epoch": 0.12, + "learning_rate": 4.995245904887195e-06, + "logits/chosen": -2.8773951530456543, + "logits/rejected": -2.871093273162842, + "logps/chosen": -376.4679870605469, + "logps/rejected": -338.0958251953125, + "loss": 0.7118, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.7589614391326904, + "rewards/margins": 0.04700089246034622, + "rewards/rejected": -0.8059623837471008, + "step": 900 + }, + { + "epoch": 0.12, + "eval_logits/chosen": -2.6222400665283203, + "eval_logits/rejected": -2.6316025257110596, + "eval_logps/chosen": -391.1489562988281, + "eval_logps/rejected": -371.9417724609375, + "eval_loss": 0.6598663330078125, + "eval_rewards/accuracies": 0.6019999980926514, + "eval_rewards/chosen": -0.5839956998825073, + "eval_rewards/margins": 0.12432169914245605, + "eval_rewards/rejected": -0.7083174586296082, + "eval_runtime": 197.1091, + "eval_samples_per_second": 10.147, + "eval_steps_per_second": 5.073, + "step": 900 + }, + { + "epoch": 0.12, + "learning_rate": 4.994515776083313e-06, + "logits/chosen": -2.8134074211120605, + "logits/rejected": -2.856207847595215, + "logps/chosen": -391.5662536621094, + "logps/rejected": -442.9309997558594, + "loss": 0.612, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.5585755109786987, + "rewards/margins": 0.2331864833831787, + "rewards/rejected": -0.7917619347572327, + "step": 910 + }, + { + "epoch": 0.12, + "eval_logits/chosen": -2.6121630668640137, + "eval_logits/rejected": -2.621626853942871, + "eval_logps/chosen": -394.4494323730469, + "eval_logps/rejected": -375.6937255859375, + "eval_loss": 0.6611830592155457, + "eval_rewards/accuracies": 0.6100000143051147, + "eval_rewards/chosen": -0.6170003414154053, + "eval_rewards/margins": 0.12883655726909637, + "eval_rewards/rejected": -0.7458369731903076, + "eval_runtime": 197.2098, + "eval_samples_per_second": 10.141, + "eval_steps_per_second": 5.071, + "step": 910 + }, + { + "epoch": 0.12, + "learning_rate": 4.993733574170316e-06, + "logits/chosen": -2.858757972717285, + "logits/rejected": -2.8651883602142334, + "logps/chosen": -346.06536865234375, + "logps/rejected": -341.8927917480469, + "loss": 0.6676, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.5705429911613464, + "rewards/margins": 0.13456781208515167, + "rewards/rejected": -0.7051107883453369, + "step": 920 + }, + { + "epoch": 0.12, + "eval_logits/chosen": -2.609501361846924, + "eval_logits/rejected": -2.619729995727539, + "eval_logps/chosen": -394.84228515625, + "eval_logps/rejected": -376.29443359375, + "eval_loss": 0.6611314415931702, + "eval_rewards/accuracies": 0.609000027179718, + "eval_rewards/chosen": -0.6209287643432617, + "eval_rewards/margins": 0.1309155523777008, + "eval_rewards/rejected": -0.7518444061279297, + "eval_runtime": 196.9861, + "eval_samples_per_second": 10.153, + "eval_steps_per_second": 5.077, + "step": 920 + }, + { + "epoch": 0.12, + "learning_rate": 4.992899315476696e-06, + "logits/chosen": -2.884894371032715, + "logits/rejected": -2.8854660987854004, + "logps/chosen": -450.76397705078125, + "logps/rejected": -413.5362243652344, + "loss": 0.6577, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.6512743830680847, + "rewards/margins": 0.13802878558635712, + "rewards/rejected": -0.789303183555603, + "step": 930 + }, + { + "epoch": 0.12, + "eval_logits/chosen": -2.6049296855926514, + "eval_logits/rejected": -2.614792585372925, + "eval_logps/chosen": -397.35797119140625, + "eval_logps/rejected": -379.2372741699219, + "eval_loss": 0.6619851589202881, + "eval_rewards/accuracies": 0.609000027179718, + "eval_rewards/chosen": -0.6460856199264526, + "eval_rewards/margins": 0.135187029838562, + "eval_rewards/rejected": -0.7812727093696594, + "eval_runtime": 197.0738, + "eval_samples_per_second": 10.148, + "eval_steps_per_second": 5.074, + "step": 930 + }, + { + "epoch": 0.12, + "learning_rate": 4.9920130174176354e-06, + "logits/chosen": -2.8599836826324463, + "logits/rejected": -2.8363242149353027, + "logps/chosen": -408.45501708984375, + "logps/rejected": -397.5457763671875, + "loss": 0.637, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.6751912832260132, + "rewards/margins": 0.18209555745124817, + "rewards/rejected": -0.8572869300842285, + "step": 940 + }, + { + "epoch": 0.12, + "eval_logits/chosen": -2.6164982318878174, + "eval_logits/rejected": -2.6263084411621094, + "eval_logps/chosen": -390.9404602050781, + "eval_logps/rejected": -372.2401428222656, + "eval_loss": 0.659035325050354, + "eval_rewards/accuracies": 0.609499990940094, + "eval_rewards/chosen": -0.5819105505943298, + "eval_rewards/margins": 0.1293908655643463, + "eval_rewards/rejected": -0.7113014459609985, + "eval_runtime": 197.105, + "eval_samples_per_second": 10.147, + "eval_steps_per_second": 5.073, + "step": 940 + }, + { + "epoch": 0.12, + "learning_rate": 4.991074698494638e-06, + "logits/chosen": -2.910370349884033, + "logits/rejected": -2.889981746673584, + "logps/chosen": -395.23870849609375, + "logps/rejected": -352.4375915527344, + "loss": 0.6663, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.5276027917861938, + "rewards/margins": 0.09363868087530136, + "rewards/rejected": -0.621241569519043, + "step": 950 + }, + { + "epoch": 0.12, + "eval_logits/chosen": -2.623030185699463, + "eval_logits/rejected": -2.6329829692840576, + "eval_logps/chosen": -385.2651062011719, + "eval_logps/rejected": -366.1216125488281, + "eval_loss": 0.6571491956710815, + "eval_rewards/accuracies": 0.6115000247955322, + "eval_rewards/chosen": -0.5251567959785461, + "eval_rewards/margins": 0.12495911866426468, + "eval_rewards/rejected": -0.6501159071922302, + "eval_runtime": 197.0134, + "eval_samples_per_second": 10.152, + "eval_steps_per_second": 5.076, + "step": 950 + }, + { + "epoch": 0.13, + "learning_rate": 4.990084378295148e-06, + "logits/chosen": -2.9056191444396973, + "logits/rejected": -2.914172410964966, + "logps/chosen": -356.25457763671875, + "logps/rejected": -331.71575927734375, + "loss": 0.6437, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.48699110746383667, + "rewards/margins": 0.14344017207622528, + "rewards/rejected": -0.6304312944412231, + "step": 960 + }, + { + "epoch": 0.13, + "eval_logits/chosen": -2.6210577487945557, + "eval_logits/rejected": -2.631321907043457, + "eval_logps/chosen": -384.9283752441406, + "eval_logps/rejected": -365.94134521484375, + "eval_loss": 0.6564494967460632, + "eval_rewards/accuracies": 0.6140000224113464, + "eval_rewards/chosen": -0.5217894315719604, + "eval_rewards/margins": 0.1265236735343933, + "eval_rewards/rejected": -0.6483131051063538, + "eval_runtime": 197.16, + "eval_samples_per_second": 10.144, + "eval_steps_per_second": 5.072, + "step": 960 + }, + { + "epoch": 0.13, + "learning_rate": 4.989042077492135e-06, + "logits/chosen": -2.8806536197662354, + "logits/rejected": -2.8581955432891846, + "logps/chosen": -384.424560546875, + "logps/rejected": -377.8148498535156, + "loss": 0.6053, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.45277565717697144, + "rewards/margins": 0.22111694514751434, + "rewards/rejected": -0.6738926768302917, + "step": 970 + }, + { + "epoch": 0.13, + "eval_logits/chosen": -2.6111137866973877, + "eval_logits/rejected": -2.62229585647583, + "eval_logps/chosen": -389.8968811035156, + "eval_logps/rejected": -371.7264404296875, + "eval_loss": 0.6566739678382874, + "eval_rewards/accuracies": 0.6179999709129333, + "eval_rewards/chosen": -0.5714748501777649, + "eval_rewards/margins": 0.1346893310546875, + "eval_rewards/rejected": -0.7061640620231628, + "eval_runtime": 197.1264, + "eval_samples_per_second": 10.146, + "eval_steps_per_second": 5.073, + "step": 970 + }, + { + "epoch": 0.13, + "learning_rate": 4.987947817843665e-06, + "logits/chosen": -2.7882161140441895, + "logits/rejected": -2.828187942504883, + "logps/chosen": -369.6874084472656, + "logps/rejected": -356.955810546875, + "loss": 0.63, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.6042593717575073, + "rewards/margins": 0.21899476647377014, + "rewards/rejected": -0.8232541084289551, + "step": 980 + }, + { + "epoch": 0.13, + "eval_logits/chosen": -2.5871171951293945, + "eval_logits/rejected": -2.59865403175354, + "eval_logps/chosen": -404.0250244140625, + "eval_logps/rejected": -387.54180908203125, + "eval_loss": 0.6614810824394226, + "eval_rewards/accuracies": 0.612500011920929, + "eval_rewards/chosen": -0.7127563953399658, + "eval_rewards/margins": 0.15156131982803345, + "eval_rewards/rejected": -0.8643176555633545, + "eval_runtime": 197.0053, + "eval_samples_per_second": 10.152, + "eval_steps_per_second": 5.076, + "step": 980 + }, + { + "epoch": 0.13, + "learning_rate": 4.986801622192453e-06, + "logits/chosen": -2.840859889984131, + "logits/rejected": -2.831991672515869, + "logps/chosen": -351.64703369140625, + "logps/rejected": -344.9053649902344, + "loss": 0.6434, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.7253022193908691, + "rewards/margins": 0.21113955974578857, + "rewards/rejected": -0.9364417195320129, + "step": 990 + }, + { + "epoch": 0.13, + "eval_logits/chosen": -2.580679416656494, + "eval_logits/rejected": -2.5909790992736816, + "eval_logps/chosen": -407.9801330566406, + "eval_logps/rejected": -392.0733337402344, + "eval_loss": 0.6637634634971619, + "eval_rewards/accuracies": 0.6075000166893005, + "eval_rewards/chosen": -0.7523072361946106, + "eval_rewards/margins": 0.15732604265213013, + "eval_rewards/rejected": -0.9096333384513855, + "eval_runtime": 196.9793, + "eval_samples_per_second": 10.153, + "eval_steps_per_second": 5.077, + "step": 990 + }, + { + "epoch": 0.13, + "learning_rate": 4.985603514465372e-06, + "logits/chosen": -2.8628830909729004, + "logits/rejected": -2.8967411518096924, + "logps/chosen": -390.51971435546875, + "logps/rejected": -410.86322021484375, + "loss": 0.6192, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.6669122576713562, + "rewards/margins": 0.27157723903656006, + "rewards/rejected": -0.938489556312561, + "step": 1000 + }, + { + "epoch": 0.13, + "eval_logits/chosen": -2.580045700073242, + "eval_logits/rejected": -2.589877128601074, + "eval_logps/chosen": -409.75250244140625, + "eval_logps/rejected": -394.31695556640625, + "eval_loss": 0.6645926237106323, + "eval_rewards/accuracies": 0.6079999804496765, + "eval_rewards/chosen": -0.7700310945510864, + "eval_rewards/margins": 0.1620384156703949, + "eval_rewards/rejected": -0.9320694208145142, + "eval_runtime": 196.9612, + "eval_samples_per_second": 10.154, + "eval_steps_per_second": 5.077, + "step": 1000 + }, + { + "epoch": 0.13, + "learning_rate": 4.984353519672966e-06, + "logits/chosen": -2.780689001083374, + "logits/rejected": -2.805438280105591, + "logps/chosen": -399.078857421875, + "logps/rejected": -371.93798828125, + "loss": 0.6942, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.7688107490539551, + "rewards/margins": 0.07026199996471405, + "rewards/rejected": -0.8390728235244751, + "step": 1010 + }, + { + "epoch": 0.13, + "eval_logits/chosen": -2.5973100662231445, + "eval_logits/rejected": -2.6079728603363037, + "eval_logps/chosen": -397.49072265625, + "eval_logps/rejected": -380.68865966796875, + "eval_loss": 0.6575655341148376, + "eval_rewards/accuracies": 0.6144999861717224, + "eval_rewards/chosen": -0.6474130153656006, + "eval_rewards/margins": 0.14837341010570526, + "eval_rewards/rejected": -0.7957863807678223, + "eval_runtime": 197.0561, + "eval_samples_per_second": 10.149, + "eval_steps_per_second": 5.075, + "step": 1010 + }, + { + "epoch": 0.13, + "learning_rate": 4.9830516639089226e-06, + "logits/chosen": -2.8402628898620605, + "logits/rejected": -2.847748279571533, + "logps/chosen": -434.74786376953125, + "logps/rejected": -364.552978515625, + "loss": 0.6408, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.6251915097236633, + "rewards/margins": 0.16107437014579773, + "rewards/rejected": -0.7862659692764282, + "step": 1020 + }, + { + "epoch": 0.13, + "eval_logits/chosen": -2.5868897438049316, + "eval_logits/rejected": -2.597947359085083, + "eval_logps/chosen": -401.7997741699219, + "eval_logps/rejected": -385.65380859375, + "eval_loss": 0.6590712666511536, + "eval_rewards/accuracies": 0.6150000095367432, + "eval_rewards/chosen": -0.6905036568641663, + "eval_rewards/margins": 0.15493395924568176, + "eval_rewards/rejected": -0.8454375863075256, + "eval_runtime": 196.979, + "eval_samples_per_second": 10.153, + "eval_steps_per_second": 5.077, + "step": 1020 + }, + { + "epoch": 0.13, + "learning_rate": 4.9816979743495296e-06, + "logits/chosen": -2.864896774291992, + "logits/rejected": -2.867267608642578, + "logps/chosen": -451.87939453125, + "logps/rejected": -427.56402587890625, + "loss": 0.6204, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.7102250456809998, + "rewards/margins": 0.23890939354896545, + "rewards/rejected": -0.9491344690322876, + "step": 1030 + }, + { + "epoch": 0.13, + "eval_logits/chosen": -2.5824921131134033, + "eval_logits/rejected": -2.593012809753418, + "eval_logps/chosen": -404.1101379394531, + "eval_logps/rejected": -388.3902587890625, + "eval_loss": 0.6607739329338074, + "eval_rewards/accuracies": 0.6184999942779541, + "eval_rewards/chosen": -0.7136072516441345, + "eval_rewards/margins": 0.1591949313879013, + "eval_rewards/rejected": -0.872802197933197, + "eval_runtime": 196.8691, + "eval_samples_per_second": 10.159, + "eval_steps_per_second": 5.08, + "step": 1030 + }, + { + "epoch": 0.14, + "learning_rate": 4.980292479253105e-06, + "logits/chosen": -2.8844125270843506, + "logits/rejected": -2.890221118927002, + "logps/chosen": -439.6178283691406, + "logps/rejected": -408.71136474609375, + "loss": 0.5986, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.6904081106185913, + "rewards/margins": 0.2821322977542877, + "rewards/rejected": -0.9725404977798462, + "step": 1040 + }, + { + "epoch": 0.14, + "eval_logits/chosen": -2.5676214694976807, + "eval_logits/rejected": -2.5784456729888916, + "eval_logps/chosen": -413.231201171875, + "eval_logps/rejected": -398.8504638671875, + "eval_loss": 0.6670145392417908, + "eval_rewards/accuracies": 0.612500011920929, + "eval_rewards/chosen": -0.8048175573348999, + "eval_rewards/margins": 0.17258678376674652, + "eval_rewards/rejected": -0.9774044156074524, + "eval_runtime": 196.6812, + "eval_samples_per_second": 10.169, + "eval_steps_per_second": 5.084, + "step": 1040 + }, + { + "epoch": 0.14, + "learning_rate": 4.978835207959414e-06, + "logits/chosen": -2.8102452754974365, + "logits/rejected": -2.813763380050659, + "logps/chosen": -398.92425537109375, + "logps/rejected": -385.8040771484375, + "loss": 0.6624, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.7675672769546509, + "rewards/margins": 0.1677415817975998, + "rewards/rejected": -0.9353087544441223, + "step": 1050 + }, + { + "epoch": 0.14, + "eval_logits/chosen": -2.5734024047851562, + "eval_logits/rejected": -2.5844082832336426, + "eval_logps/chosen": -411.4896240234375, + "eval_logps/rejected": -397.12872314453125, + "eval_loss": 0.6648815870285034, + "eval_rewards/accuracies": 0.6150000095367432, + "eval_rewards/chosen": -0.7874022126197815, + "eval_rewards/margins": 0.17278487980365753, + "eval_rewards/rejected": -0.9601870775222778, + "eval_runtime": 197.1774, + "eval_samples_per_second": 10.143, + "eval_steps_per_second": 5.072, + "step": 1050 + }, + { + "epoch": 0.14, + "learning_rate": 4.977326190889046e-06, + "logits/chosen": -2.831808090209961, + "logits/rejected": -2.7664592266082764, + "logps/chosen": -405.8113708496094, + "logps/rejected": -346.0255432128906, + "loss": 0.6528, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.664142906665802, + "rewards/margins": 0.14156608283519745, + "rewards/rejected": -0.8057088851928711, + "step": 1060 + }, + { + "epoch": 0.14, + "eval_logits/chosen": -2.5949344635009766, + "eval_logits/rejected": -2.606123685836792, + "eval_logps/chosen": -399.26690673828125, + "eval_logps/rejected": -383.4966735839844, + "eval_loss": 0.6575686931610107, + "eval_rewards/accuracies": 0.6169999837875366, + "eval_rewards/chosen": -0.6651748418807983, + "eval_rewards/margins": 0.1586921513080597, + "eval_rewards/rejected": -0.8238670825958252, + "eval_runtime": 196.9062, + "eval_samples_per_second": 10.157, + "eval_steps_per_second": 5.079, + "step": 1060 + }, + { + "epoch": 0.14, + "learning_rate": 4.975765459542788e-06, + "logits/chosen": -2.8009772300720215, + "logits/rejected": -2.8274002075195312, + "logps/chosen": -376.3597106933594, + "logps/rejected": -378.44921875, + "loss": 0.6332, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.5817060470581055, + "rewards/margins": 0.20408394932746887, + "rewards/rejected": -0.7857899069786072, + "step": 1070 + }, + { + "epoch": 0.14, + "eval_logits/chosen": -2.5985264778137207, + "eval_logits/rejected": -2.610111951828003, + "eval_logps/chosen": -396.3663635253906, + "eval_logps/rejected": -380.4352722167969, + "eval_loss": 0.656349778175354, + "eval_rewards/accuracies": 0.6175000071525574, + "eval_rewards/chosen": -0.6361696124076843, + "eval_rewards/margins": 0.15708313882350922, + "eval_rewards/rejected": -0.7932528257369995, + "eval_runtime": 196.8426, + "eval_samples_per_second": 10.16, + "eval_steps_per_second": 5.08, + "step": 1070 + }, + { + "epoch": 0.14, + "learning_rate": 4.9741530465009665e-06, + "logits/chosen": -2.767240285873413, + "logits/rejected": -2.743711471557617, + "logps/chosen": -362.4321594238281, + "logps/rejected": -348.2496032714844, + "loss": 0.6364, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.556038498878479, + "rewards/margins": 0.1609477698802948, + "rewards/rejected": -0.7169862985610962, + "step": 1080 + }, + { + "epoch": 0.14, + "eval_logits/chosen": -2.5956056118011475, + "eval_logits/rejected": -2.607423782348633, + "eval_logps/chosen": -397.013916015625, + "eval_logps/rejected": -381.3038635253906, + "eval_loss": 0.6564236879348755, + "eval_rewards/accuracies": 0.6140000224113464, + "eval_rewards/chosen": -0.6426447629928589, + "eval_rewards/margins": 0.15929388999938965, + "eval_rewards/rejected": -0.8019387125968933, + "eval_runtime": 196.8787, + "eval_samples_per_second": 10.159, + "eval_steps_per_second": 5.079, + "step": 1080 + }, + { + "epoch": 0.14, + "learning_rate": 4.972488985422763e-06, + "logits/chosen": -2.787623882293701, + "logits/rejected": -2.7924771308898926, + "logps/chosen": -364.26190185546875, + "logps/rejected": -345.1329650878906, + "loss": 0.6063, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.5557677745819092, + "rewards/margins": 0.29480546712875366, + "rewards/rejected": -0.8505731821060181, + "step": 1090 + }, + { + "epoch": 0.14, + "eval_logits/chosen": -2.58791184425354, + "eval_logits/rejected": -2.5994956493377686, + "eval_logps/chosen": -401.9653015136719, + "eval_logps/rejected": -387.0663757324219, + "eval_loss": 0.6588745713233948, + "eval_rewards/accuracies": 0.6140000224113464, + "eval_rewards/chosen": -0.6921590566635132, + "eval_rewards/margins": 0.16740475594997406, + "eval_rewards/rejected": -0.8595638275146484, + "eval_runtime": 196.8539, + "eval_samples_per_second": 10.16, + "eval_steps_per_second": 5.08, + "step": 1090 + }, + { + "epoch": 0.14, + "learning_rate": 4.970773311045514e-06, + "logits/chosen": -2.7719860076904297, + "logits/rejected": -2.7706387042999268, + "logps/chosen": -385.5480651855469, + "logps/rejected": -369.0314636230469, + "loss": 0.6684, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.6518079042434692, + "rewards/margins": 0.12277636677026749, + "rewards/rejected": -0.7745842337608337, + "step": 1100 + }, + { + "epoch": 0.14, + "eval_logits/chosen": -2.595028877258301, + "eval_logits/rejected": -2.6067097187042236, + "eval_logps/chosen": -397.0016784667969, + "eval_logps/rejected": -381.60601806640625, + "eval_loss": 0.6570342183113098, + "eval_rewards/accuracies": 0.6159999966621399, + "eval_rewards/chosen": -0.6425228714942932, + "eval_rewards/margins": 0.1624370813369751, + "eval_rewards/rejected": -0.8049599528312683, + "eval_runtime": 197.1886, + "eval_samples_per_second": 10.143, + "eval_steps_per_second": 5.071, + "step": 1100 + }, + { + "epoch": 0.15, + "learning_rate": 4.969006059183984e-06, + "logits/chosen": -2.790360689163208, + "logits/rejected": -2.7791943550109863, + "logps/chosen": -398.4950866699219, + "logps/rejected": -373.24981689453125, + "loss": 0.6948, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.618482768535614, + "rewards/margins": 0.09040616452693939, + "rewards/rejected": -0.7088888883590698, + "step": 1110 + }, + { + "epoch": 0.15, + "eval_logits/chosen": -2.6154470443725586, + "eval_logits/rejected": -2.6268742084503174, + "eval_logps/chosen": -386.2301940917969, + "eval_logps/rejected": -369.3778381347656, + "eval_loss": 0.6534083485603333, + "eval_rewards/accuracies": 0.6240000128746033, + "eval_rewards/chosen": -0.5348080396652222, + "eval_rewards/margins": 0.1478704810142517, + "eval_rewards/rejected": -0.6826784610748291, + "eval_runtime": 197.1538, + "eval_samples_per_second": 10.144, + "eval_steps_per_second": 5.072, + "step": 1110 + }, + { + "epoch": 0.15, + "learning_rate": 4.967187266729623e-06, + "logits/chosen": -2.917677164077759, + "logits/rejected": -2.8968892097473145, + "logps/chosen": -393.16241455078125, + "logps/rejected": -371.6067199707031, + "loss": 0.683, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.5983080863952637, + "rewards/margins": 0.09545192122459412, + "rewards/rejected": -0.6937600374221802, + "step": 1120 + }, + { + "epoch": 0.15, + "eval_logits/chosen": -2.6260616779327393, + "eval_logits/rejected": -2.6370902061462402, + "eval_logps/chosen": -381.9963073730469, + "eval_logps/rejected": -364.393310546875, + "eval_loss": 0.652645468711853, + "eval_rewards/accuracies": 0.6255000233650208, + "eval_rewards/chosen": -0.49246877431869507, + "eval_rewards/margins": 0.1403646171092987, + "eval_rewards/rejected": -0.6328333616256714, + "eval_runtime": 196.8079, + "eval_samples_per_second": 10.162, + "eval_steps_per_second": 5.081, + "step": 1120 + }, + { + "epoch": 0.15, + "learning_rate": 4.965316971649791e-06, + "logits/chosen": -2.8983585834503174, + "logits/rejected": -2.887768030166626, + "logps/chosen": -404.21990966796875, + "logps/rejected": -374.83392333984375, + "loss": 0.5879, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.43791407346725464, + "rewards/margins": 0.28374427556991577, + "rewards/rejected": -0.7216584086418152, + "step": 1130 + }, + { + "epoch": 0.15, + "eval_logits/chosen": -2.6281559467315674, + "eval_logits/rejected": -2.639291763305664, + "eval_logps/chosen": -381.9638366699219, + "eval_logps/rejected": -364.2988586425781, + "eval_loss": 0.6523311138153076, + "eval_rewards/accuracies": 0.6209999918937683, + "eval_rewards/chosen": -0.4921444058418274, + "eval_rewards/margins": 0.13974425196647644, + "eval_rewards/rejected": -0.6318886280059814, + "eval_runtime": 196.9943, + "eval_samples_per_second": 10.153, + "eval_steps_per_second": 5.076, + "step": 1130 + }, + { + "epoch": 0.15, + "learning_rate": 4.963395212986964e-06, + "logits/chosen": -2.8828487396240234, + "logits/rejected": -2.862426280975342, + "logps/chosen": -347.30792236328125, + "logps/rejected": -316.6706237792969, + "loss": 0.6422, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.4925723075866699, + "rewards/margins": 0.1589849591255188, + "rewards/rejected": -0.6515573263168335, + "step": 1140 + }, + { + "epoch": 0.15, + "eval_logits/chosen": -2.628563404083252, + "eval_logits/rejected": -2.6401455402374268, + "eval_logps/chosen": -382.7380676269531, + "eval_logps/rejected": -365.3920593261719, + "eval_loss": 0.6521285176277161, + "eval_rewards/accuracies": 0.6234999895095825, + "eval_rewards/chosen": -0.49988648295402527, + "eval_rewards/margins": 0.1429338902235031, + "eval_rewards/rejected": -0.6428203582763672, + "eval_runtime": 196.8857, + "eval_samples_per_second": 10.158, + "eval_steps_per_second": 5.079, + "step": 1140 + }, + { + "epoch": 0.15, + "learning_rate": 4.9614220308579285e-06, + "logits/chosen": -2.8444035053253174, + "logits/rejected": -2.877077341079712, + "logps/chosen": -386.1272888183594, + "logps/rejected": -391.4237976074219, + "loss": 0.6534, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.5051605105400085, + "rewards/margins": 0.12393464893102646, + "rewards/rejected": -0.629095196723938, + "step": 1150 + }, + { + "epoch": 0.15, + "eval_logits/chosen": -2.623694896697998, + "eval_logits/rejected": -2.6355655193328857, + "eval_logps/chosen": -384.07421875, + "eval_logps/rejected": -367.05096435546875, + "eval_loss": 0.6519166231155396, + "eval_rewards/accuracies": 0.6215000152587891, + "eval_rewards/chosen": -0.5132482051849365, + "eval_rewards/margins": 0.14616157114505768, + "eval_rewards/rejected": -0.6594097018241882, + "eval_runtime": 197.0766, + "eval_samples_per_second": 10.148, + "eval_steps_per_second": 5.074, + "step": 1150 + }, + { + "epoch": 0.15, + "learning_rate": 4.9593974664529325e-06, + "logits/chosen": -2.8335769176483154, + "logits/rejected": -2.8060200214385986, + "logps/chosen": -384.097412109375, + "logps/rejected": -385.43145751953125, + "loss": 0.642, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.5143892168998718, + "rewards/margins": 0.1666194498538971, + "rewards/rejected": -0.6810086369514465, + "step": 1160 + }, + { + "epoch": 0.15, + "eval_logits/chosen": -2.618823528289795, + "eval_logits/rejected": -2.6312339305877686, + "eval_logps/chosen": -386.3024597167969, + "eval_logps/rejected": -369.7018737792969, + "eval_loss": 0.6519332528114319, + "eval_rewards/accuracies": 0.6240000128746033, + "eval_rewards/chosen": -0.535530686378479, + "eval_rewards/margins": 0.1503879874944687, + "eval_rewards/rejected": -0.6859186887741089, + "eval_runtime": 196.9, + "eval_samples_per_second": 10.157, + "eval_steps_per_second": 5.079, + "step": 1160 + }, + { + "epoch": 0.15, + "learning_rate": 4.957321562034833e-06, + "logits/chosen": -2.9319796562194824, + "logits/rejected": -2.925686836242676, + "logps/chosen": -401.462890625, + "logps/rejected": -396.30706787109375, + "loss": 0.6138, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.5492364168167114, + "rewards/margins": 0.2358781397342682, + "rewards/rejected": -0.7851146459579468, + "step": 1170 + }, + { + "epoch": 0.15, + "eval_logits/chosen": -2.6126275062561035, + "eval_logits/rejected": -2.625770092010498, + "eval_logps/chosen": -390.0626220703125, + "eval_logps/rejected": -374.27813720703125, + "eval_loss": 0.6525918245315552, + "eval_rewards/accuracies": 0.6230000257492065, + "eval_rewards/chosen": -0.5731325745582581, + "eval_rewards/margins": 0.15854857861995697, + "eval_rewards/rejected": -0.7316811680793762, + "eval_runtime": 196.7922, + "eval_samples_per_second": 10.163, + "eval_steps_per_second": 5.082, + "step": 1170 + }, + { + "epoch": 0.15, + "learning_rate": 4.955194360938214e-06, + "logits/chosen": -2.9208590984344482, + "logits/rejected": -2.9480223655700684, + "logps/chosen": -372.567626953125, + "logps/rejected": -351.185791015625, + "loss": 0.6603, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.5812569260597229, + "rewards/margins": 0.12481508404016495, + "rewards/rejected": -0.7060720324516296, + "step": 1180 + }, + { + "epoch": 0.15, + "eval_logits/chosen": -2.609142780303955, + "eval_logits/rejected": -2.6229264736175537, + "eval_logps/chosen": -395.072021484375, + "eval_logps/rejected": -380.0800476074219, + "eval_loss": 0.6529130935668945, + "eval_rewards/accuracies": 0.6184999942779541, + "eval_rewards/chosen": -0.6232264041900635, + "eval_rewards/margins": 0.16647417843341827, + "eval_rewards/rejected": -0.7897005081176758, + "eval_runtime": 197.0058, + "eval_samples_per_second": 10.152, + "eval_steps_per_second": 5.076, + "step": 1180 + }, + { + "epoch": 0.16, + "learning_rate": 4.9530159075684735e-06, + "logits/chosen": -2.8826727867126465, + "logits/rejected": -2.865142345428467, + "logps/chosen": -355.7762145996094, + "logps/rejected": -448.8487854003906, + "loss": 0.6446, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.6446607112884521, + "rewards/margins": 0.2116236686706543, + "rewards/rejected": -0.8562844395637512, + "step": 1190 + }, + { + "epoch": 0.16, + "eval_logits/chosen": -2.6025123596191406, + "eval_logits/rejected": -2.6169042587280273, + "eval_logps/chosen": -397.5279235839844, + "eval_logps/rejected": -382.97857666015625, + "eval_loss": 0.6540065407752991, + "eval_rewards/accuracies": 0.6159999966621399, + "eval_rewards/chosen": -0.6477850675582886, + "eval_rewards/margins": 0.17090027034282684, + "eval_rewards/rejected": -0.8186854124069214, + "eval_runtime": 197.0343, + "eval_samples_per_second": 10.151, + "eval_steps_per_second": 5.075, + "step": 1190 + }, + { + "epoch": 0.16, + "learning_rate": 4.950786247400908e-06, + "logits/chosen": -2.848290205001831, + "logits/rejected": -2.8513758182525635, + "logps/chosen": -365.59149169921875, + "logps/rejected": -357.754150390625, + "loss": 0.6647, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.6885515451431274, + "rewards/margins": 0.1252683699131012, + "rewards/rejected": -0.8138198852539062, + "step": 1200 + }, + { + "epoch": 0.16, + "eval_logits/chosen": -2.599571466445923, + "eval_logits/rejected": -2.6143128871917725, + "eval_logps/chosen": -398.3748474121094, + "eval_logps/rejected": -383.98876953125, + "eval_loss": 0.6546086668968201, + "eval_rewards/accuracies": 0.6184999942779541, + "eval_rewards/chosen": -0.6562545299530029, + "eval_rewards/margins": 0.17253316938877106, + "eval_rewards/rejected": -0.8287877440452576, + "eval_runtime": 197.0359, + "eval_samples_per_second": 10.15, + "eval_steps_per_second": 5.075, + "step": 1200 + }, + { + "epoch": 0.16, + "learning_rate": 4.948505426979756e-06, + "logits/chosen": -2.82503342628479, + "logits/rejected": -2.8127999305725098, + "logps/chosen": -384.06732177734375, + "logps/rejected": -385.8426513671875, + "loss": 0.6214, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.6643961071968079, + "rewards/margins": 0.2610850930213928, + "rewards/rejected": -0.9254812002182007, + "step": 1210 + }, + { + "epoch": 0.16, + "eval_logits/chosen": -2.6038050651550293, + "eval_logits/rejected": -2.618673801422119, + "eval_logps/chosen": -400.09637451171875, + "eval_logps/rejected": -385.89654541015625, + "eval_loss": 0.653429388999939, + "eval_rewards/accuracies": 0.6225000023841858, + "eval_rewards/chosen": -0.6734698414802551, + "eval_rewards/margins": 0.17439521849155426, + "eval_rewards/rejected": -0.8478650450706482, + "eval_runtime": 196.8932, + "eval_samples_per_second": 10.158, + "eval_steps_per_second": 5.079, + "step": 1210 + }, + { + "epoch": 0.16, + "learning_rate": 4.946173493917228e-06, + "logits/chosen": -2.826169490814209, + "logits/rejected": -2.832860231399536, + "logps/chosen": -395.909423828125, + "logps/rejected": -354.52105712890625, + "loss": 0.7606, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.7976791262626648, + "rewards/margins": -0.05043324828147888, + "rewards/rejected": -0.7472458481788635, + "step": 1220 + }, + { + "epoch": 0.16, + "eval_logits/chosen": -2.6216962337493896, + "eval_logits/rejected": -2.635549545288086, + "eval_logps/chosen": -393.2814636230469, + "eval_logps/rejected": -377.7646484375, + "eval_loss": 0.650115430355072, + "eval_rewards/accuracies": 0.6234999895095825, + "eval_rewards/chosen": -0.6053206324577332, + "eval_rewards/margins": 0.16122600436210632, + "eval_rewards/rejected": -0.7665466070175171, + "eval_runtime": 197.0589, + "eval_samples_per_second": 10.149, + "eval_steps_per_second": 5.075, + "step": 1220 + }, + { + "epoch": 0.16, + "learning_rate": 4.943790496892513e-06, + "logits/chosen": -2.900090456008911, + "logits/rejected": -2.9031574726104736, + "logps/chosen": -381.09210205078125, + "logps/rejected": -344.6459655761719, + "loss": 0.64, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.5675815939903259, + "rewards/margins": 0.18301823735237122, + "rewards/rejected": -0.7505998015403748, + "step": 1230 + }, + { + "epoch": 0.16, + "eval_logits/chosen": -2.635103464126587, + "eval_logits/rejected": -2.648451566696167, + "eval_logps/chosen": -388.4599304199219, + "eval_logps/rejected": -372.07000732421875, + "eval_loss": 0.648918867111206, + "eval_rewards/accuracies": 0.6234999895095825, + "eval_rewards/chosen": -0.5571054816246033, + "eval_rewards/margins": 0.15249404311180115, + "eval_rewards/rejected": -0.709599494934082, + "eval_runtime": 196.9092, + "eval_samples_per_second": 10.157, + "eval_steps_per_second": 5.078, + "step": 1230 + }, + { + "epoch": 0.16, + "learning_rate": 4.941356485650762e-06, + "logits/chosen": -2.9525580406188965, + "logits/rejected": -2.941685676574707, + "logps/chosen": -429.52752685546875, + "logps/rejected": -408.3736877441406, + "loss": 0.6503, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.5319823026657104, + "rewards/margins": 0.152207612991333, + "rewards/rejected": -0.6841899752616882, + "step": 1240 + }, + { + "epoch": 0.16, + "eval_logits/chosen": -2.641964912414551, + "eval_logits/rejected": -2.6544265747070312, + "eval_logps/chosen": -383.81805419921875, + "eval_logps/rejected": -366.7745666503906, + "eval_loss": 0.6485514044761658, + "eval_rewards/accuracies": 0.628000020980835, + "eval_rewards/chosen": -0.5106862187385559, + "eval_rewards/margins": 0.14595915377140045, + "eval_rewards/rejected": -0.6566452980041504, + "eval_runtime": 197.009, + "eval_samples_per_second": 10.152, + "eval_steps_per_second": 5.076, + "step": 1240 + }, + { + "epoch": 0.16, + "learning_rate": 4.93887151100205e-06, + "logits/chosen": -2.8823115825653076, + "logits/rejected": -2.9025607109069824, + "logps/chosen": -431.13311767578125, + "logps/rejected": -402.4587707519531, + "loss": 0.6625, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.45433109998703003, + "rewards/margins": 0.09613112360239029, + "rewards/rejected": -0.5504623055458069, + "step": 1250 + }, + { + "epoch": 0.16, + "eval_logits/chosen": -2.650139570236206, + "eval_logits/rejected": -2.6617093086242676, + "eval_logps/chosen": -379.8656311035156, + "eval_logps/rejected": -362.19818115234375, + "eval_loss": 0.6486051082611084, + "eval_rewards/accuracies": 0.6290000081062317, + "eval_rewards/chosen": -0.47116225957870483, + "eval_rewards/margins": 0.139719620347023, + "eval_rewards/rejected": -0.6108819246292114, + "eval_runtime": 197.0634, + "eval_samples_per_second": 10.149, + "eval_steps_per_second": 5.075, + "step": 1250 + }, + { + "epoch": 0.16, + "learning_rate": 4.936335624820313e-06, + "logits/chosen": -2.9498510360717773, + "logits/rejected": -2.936628818511963, + "logps/chosen": -369.2878112792969, + "logps/rejected": -331.2504577636719, + "loss": 0.6365, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.41221198439598083, + "rewards/margins": 0.15033474564552307, + "rewards/rejected": -0.5625467896461487, + "step": 1260 + }, + { + "epoch": 0.16, + "eval_logits/chosen": -2.648029088973999, + "eval_logits/rejected": -2.659898281097412, + "eval_logps/chosen": -379.5408630371094, + "eval_logps/rejected": -361.9754638671875, + "eval_loss": 0.6484161615371704, + "eval_rewards/accuracies": 0.6299999952316284, + "eval_rewards/chosen": -0.4679144322872162, + "eval_rewards/margins": 0.1407402604818344, + "eval_rewards/rejected": -0.6086547374725342, + "eval_runtime": 196.9078, + "eval_samples_per_second": 10.157, + "eval_steps_per_second": 5.079, + "step": 1260 + }, + { + "epoch": 0.17, + "learning_rate": 4.933748880042271e-06, + "logits/chosen": -2.9828124046325684, + "logits/rejected": -2.9394354820251465, + "logps/chosen": -375.38494873046875, + "logps/rejected": -345.7095642089844, + "loss": 0.6314, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.43731123208999634, + "rewards/margins": 0.17238859832286835, + "rewards/rejected": -0.6096998453140259, + "step": 1270 + }, + { + "epoch": 0.17, + "eval_logits/chosen": -2.6389575004577637, + "eval_logits/rejected": -2.6514506340026855, + "eval_logps/chosen": -384.9039001464844, + "eval_logps/rejected": -368.42083740234375, + "eval_loss": 0.6474685072898865, + "eval_rewards/accuracies": 0.6274999976158142, + "eval_rewards/chosen": -0.5215447545051575, + "eval_rewards/margins": 0.1515636146068573, + "eval_rewards/rejected": -0.6731082797050476, + "eval_runtime": 196.8359, + "eval_samples_per_second": 10.161, + "eval_steps_per_second": 5.08, + "step": 1270 + }, + { + "epoch": 0.17, + "learning_rate": 4.931111330666317e-06, + "logits/chosen": -2.8784518241882324, + "logits/rejected": -2.8599307537078857, + "logps/chosen": -365.7098693847656, + "logps/rejected": -329.1894836425781, + "loss": 0.6429, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.5193601846694946, + "rewards/margins": 0.14618203043937683, + "rewards/rejected": -0.6655422449111938, + "step": 1280 + }, + { + "epoch": 0.17, + "eval_logits/chosen": -2.629905939102173, + "eval_logits/rejected": -2.643465042114258, + "eval_logps/chosen": -391.0508728027344, + "eval_logps/rejected": -375.53936767578125, + "eval_loss": 0.6468499898910522, + "eval_rewards/accuracies": 0.6265000104904175, + "eval_rewards/chosen": -0.5830146670341492, + "eval_rewards/margins": 0.1612788736820221, + "eval_rewards/rejected": -0.7442935109138489, + "eval_runtime": 197.1153, + "eval_samples_per_second": 10.146, + "eval_steps_per_second": 5.073, + "step": 1280 + }, + { + "epoch": 0.17, + "learning_rate": 4.9284230317513906e-06, + "logits/chosen": -2.9220080375671387, + "logits/rejected": -2.8997421264648438, + "logps/chosen": -420.4480895996094, + "logps/rejected": -379.02520751953125, + "loss": 0.6351, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.5989479422569275, + "rewards/margins": 0.19692249596118927, + "rewards/rejected": -0.7958704829216003, + "step": 1290 + }, + { + "epoch": 0.17, + "eval_logits/chosen": -2.6292710304260254, + "eval_logits/rejected": -2.643364667892456, + "eval_logps/chosen": -395.0453186035156, + "eval_logps/rejected": -380.0833740234375, + "eval_loss": 0.6463254690170288, + "eval_rewards/accuracies": 0.6255000233650208, + "eval_rewards/chosen": -0.6229589581489563, + "eval_rewards/margins": 0.16677448153495789, + "eval_rewards/rejected": -0.7897334694862366, + "eval_runtime": 196.93, + "eval_samples_per_second": 10.156, + "eval_steps_per_second": 5.078, + "step": 1290 + }, + { + "epoch": 0.17, + "learning_rate": 4.9256840394158325e-06, + "logits/chosen": -2.8061976432800293, + "logits/rejected": -2.8045198917388916, + "logps/chosen": -407.2315368652344, + "logps/rejected": -452.9224548339844, + "loss": 0.6147, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.6434057950973511, + "rewards/margins": 0.2461806833744049, + "rewards/rejected": -0.8895864486694336, + "step": 1300 + }, + { + "epoch": 0.17, + "eval_logits/chosen": -2.6303420066833496, + "eval_logits/rejected": -2.6449639797210693, + "eval_logps/chosen": -402.0264892578125, + "eval_logps/rejected": -387.9538269042969, + "eval_loss": 0.6466883420944214, + "eval_rewards/accuracies": 0.621999979019165, + "eval_rewards/chosen": -0.6927708387374878, + "eval_rewards/margins": 0.17566701769828796, + "eval_rewards/rejected": -0.8684378862380981, + "eval_runtime": 197.2101, + "eval_samples_per_second": 10.141, + "eval_steps_per_second": 5.071, + "step": 1300 + }, + { + "epoch": 0.17, + "learning_rate": 4.922894410836207e-06, + "logits/chosen": -2.8735668659210205, + "logits/rejected": -2.8372910022735596, + "logps/chosen": -431.63714599609375, + "logps/rejected": -371.83966064453125, + "loss": 0.6809, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.7498286962509155, + "rewards/margins": 0.13587155938148499, + "rewards/rejected": -0.8857002258300781, + "step": 1310 + }, + { + "epoch": 0.17, + "eval_logits/chosen": -2.6348013877868652, + "eval_logits/rejected": -2.648871421813965, + "eval_logps/chosen": -406.22467041015625, + "eval_logps/rejected": -392.4579162597656, + "eval_loss": 0.6477887034416199, + "eval_rewards/accuracies": 0.6215000152587891, + "eval_rewards/chosen": -0.7347524166107178, + "eval_rewards/margins": 0.17872664332389832, + "eval_rewards/rejected": -0.9134791493415833, + "eval_runtime": 196.9305, + "eval_samples_per_second": 10.156, + "eval_steps_per_second": 5.078, + "step": 1310 + }, + { + "epoch": 0.17, + "learning_rate": 4.920054204246116e-06, + "logits/chosen": -2.89911150932312, + "logits/rejected": -2.8787920475006104, + "logps/chosen": -411.595947265625, + "logps/rejected": -365.5648498535156, + "loss": 0.6469, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.6206297874450684, + "rewards/margins": 0.1485154777765274, + "rewards/rejected": -0.7691451907157898, + "step": 1320 + }, + { + "epoch": 0.17, + "eval_logits/chosen": -2.6406266689300537, + "eval_logits/rejected": -2.6541240215301514, + "eval_logps/chosen": -400.6297912597656, + "eval_logps/rejected": -386.1690979003906, + "eval_loss": 0.6465025544166565, + "eval_rewards/accuracies": 0.6200000047683716, + "eval_rewards/chosen": -0.6788040399551392, + "eval_rewards/margins": 0.17178669571876526, + "eval_rewards/rejected": -0.8505907654762268, + "eval_runtime": 197.1066, + "eval_samples_per_second": 10.147, + "eval_steps_per_second": 5.073, + "step": 1320 + }, + { + "epoch": 0.17, + "learning_rate": 4.9171634789349744e-06, + "logits/chosen": -2.873453140258789, + "logits/rejected": -2.8838162422180176, + "logps/chosen": -391.2914123535156, + "logps/rejected": -407.5999755859375, + "loss": 0.5969, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.6366595029830933, + "rewards/margins": 0.26933524012565613, + "rewards/rejected": -0.9059947729110718, + "step": 1330 + }, + { + "epoch": 0.17, + "eval_logits/chosen": -2.634784460067749, + "eval_logits/rejected": -2.6493115425109863, + "eval_logps/chosen": -399.88433837890625, + "eval_logps/rejected": -385.7465515136719, + "eval_loss": 0.6460168361663818, + "eval_rewards/accuracies": 0.6215000152587891, + "eval_rewards/chosen": -0.6713496446609497, + "eval_rewards/margins": 0.17501556873321533, + "eval_rewards/rejected": -0.846365213394165, + "eval_runtime": 197.0915, + "eval_samples_per_second": 10.148, + "eval_steps_per_second": 5.074, + "step": 1330 + }, + { + "epoch": 0.18, + "learning_rate": 4.914222295246782e-06, + "logits/chosen": -2.8562376499176025, + "logits/rejected": -2.856698513031006, + "logps/chosen": -392.6881408691406, + "logps/rejected": -384.72723388671875, + "loss": 0.6755, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.6853694915771484, + "rewards/margins": 0.09361520409584045, + "rewards/rejected": -0.7789847254753113, + "step": 1340 + }, + { + "epoch": 0.18, + "eval_logits/chosen": -2.6282548904418945, + "eval_logits/rejected": -2.6436192989349365, + "eval_logps/chosen": -400.68450927734375, + "eval_logps/rejected": -386.9583740234375, + "eval_loss": 0.6460389494895935, + "eval_rewards/accuracies": 0.6209999918937683, + "eval_rewards/chosen": -0.6793510317802429, + "eval_rewards/margins": 0.1791324019432068, + "eval_rewards/rejected": -0.8584833741188049, + "eval_runtime": 197.0616, + "eval_samples_per_second": 10.149, + "eval_steps_per_second": 5.075, + "step": 1340 + }, + { + "epoch": 0.18, + "learning_rate": 4.911230714578858e-06, + "logits/chosen": -2.837684154510498, + "logits/rejected": -2.8746697902679443, + "logps/chosen": -336.82830810546875, + "logps/rejected": -383.8268127441406, + "loss": 0.6043, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.6028086543083191, + "rewards/margins": 0.26204943656921387, + "rewards/rejected": -0.8648580312728882, + "step": 1350 + }, + { + "epoch": 0.18, + "eval_logits/chosen": -2.619495391845703, + "eval_logits/rejected": -2.6352591514587402, + "eval_logps/chosen": -400.1864013671875, + "eval_logps/rejected": -386.6622009277344, + "eval_loss": 0.6463934183120728, + "eval_rewards/accuracies": 0.6265000104904175, + "eval_rewards/chosen": -0.674369752407074, + "eval_rewards/margins": 0.18115192651748657, + "eval_rewards/rejected": -0.8555216789245605, + "eval_runtime": 196.9235, + "eval_samples_per_second": 10.156, + "eval_steps_per_second": 5.078, + "step": 1350 + }, + { + "epoch": 0.18, + "learning_rate": 4.908188799380558e-06, + "logits/chosen": -2.8478968143463135, + "logits/rejected": -2.8693909645080566, + "logps/chosen": -372.5030517578125, + "logps/rejected": -350.12347412109375, + "loss": 0.6213, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.6362836360931396, + "rewards/margins": 0.21307387948036194, + "rewards/rejected": -0.8493574857711792, + "step": 1360 + }, + { + "epoch": 0.18, + "eval_logits/chosen": -2.6073155403137207, + "eval_logits/rejected": -2.623591899871826, + "eval_logps/chosen": -405.56060791015625, + "eval_logps/rejected": -392.87646484375, + "eval_loss": 0.6476759314537048, + "eval_rewards/accuracies": 0.6274999976158142, + "eval_rewards/chosen": -0.7281119227409363, + "eval_rewards/margins": 0.18955254554748535, + "eval_rewards/rejected": -0.9176644682884216, + "eval_runtime": 196.7678, + "eval_samples_per_second": 10.164, + "eval_steps_per_second": 5.082, + "step": 1360 + }, + { + "epoch": 0.18, + "learning_rate": 4.905096613151975e-06, + "logits/chosen": -2.7704315185546875, + "logits/rejected": -2.7330398559570312, + "logps/chosen": -442.86669921875, + "logps/rejected": -425.8495178222656, + "loss": 0.6907, + "rewards/accuracies": 0.512499988079071, + "rewards/chosen": -0.786264181137085, + "rewards/margins": 0.07904358208179474, + "rewards/rejected": -0.8653076887130737, + "step": 1370 + }, + { + "epoch": 0.18, + "eval_logits/chosen": -2.6075170040130615, + "eval_logits/rejected": -2.623138904571533, + "eval_logps/chosen": -406.0066223144531, + "eval_logps/rejected": -393.4615478515625, + "eval_loss": 0.6468802690505981, + "eval_rewards/accuracies": 0.625, + "eval_rewards/chosen": -0.7325721979141235, + "eval_rewards/margins": 0.19094309210777283, + "eval_rewards/rejected": -0.923515260219574, + "eval_runtime": 196.7864, + "eval_samples_per_second": 10.163, + "eval_steps_per_second": 5.082, + "step": 1370 + }, + { + "epoch": 0.18, + "learning_rate": 4.90195422044261e-06, + "logits/chosen": -2.8514914512634277, + "logits/rejected": -2.8642072677612305, + "logps/chosen": -419.1561584472656, + "logps/rejected": -406.09771728515625, + "loss": 0.6027, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.6098551750183105, + "rewards/margins": 0.275759756565094, + "rewards/rejected": -0.8856149911880493, + "step": 1380 + }, + { + "epoch": 0.18, + "eval_logits/chosen": -2.6010429859161377, + "eval_logits/rejected": -2.616837739944458, + "eval_logps/chosen": -407.6045227050781, + "eval_logps/rejected": -395.3316650390625, + "eval_loss": 0.6467740535736084, + "eval_rewards/accuracies": 0.6215000152587891, + "eval_rewards/chosen": -0.7485515475273132, + "eval_rewards/margins": 0.19366492331027985, + "eval_rewards/rejected": -0.9422163963317871, + "eval_runtime": 197.4331, + "eval_samples_per_second": 10.13, + "eval_steps_per_second": 5.065, + "step": 1380 + }, + { + "epoch": 0.18, + "learning_rate": 4.898761686850028e-06, + "logits/chosen": -2.7812657356262207, + "logits/rejected": -2.746971368789673, + "logps/chosen": -409.679443359375, + "logps/rejected": -418.7041015625, + "loss": 0.6505, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.8384740948677063, + "rewards/margins": 0.23909902572631836, + "rewards/rejected": -1.0775730609893799, + "step": 1390 + }, + { + "epoch": 0.18, + "eval_logits/chosen": -2.6073296070098877, + "eval_logits/rejected": -2.6223056316375732, + "eval_logps/chosen": -404.7939758300781, + "eval_logps/rejected": -392.07623291015625, + "eval_loss": 0.6456750631332397, + "eval_rewards/accuracies": 0.625, + "eval_rewards/chosen": -0.7204453349113464, + "eval_rewards/margins": 0.18921701610088348, + "eval_rewards/rejected": -0.9096623659133911, + "eval_runtime": 196.8403, + "eval_samples_per_second": 10.161, + "eval_steps_per_second": 5.08, + "step": 1390 + }, + { + "epoch": 0.18, + "learning_rate": 4.895519079018485e-06, + "logits/chosen": -2.752323627471924, + "logits/rejected": -2.7307071685791016, + "logps/chosen": -385.1970520019531, + "logps/rejected": -366.06488037109375, + "loss": 0.6131, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.6806402206420898, + "rewards/margins": 0.29367339611053467, + "rewards/rejected": -0.9743136167526245, + "step": 1400 + }, + { + "epoch": 0.18, + "eval_logits/chosen": -2.611858367919922, + "eval_logits/rejected": -2.6268720626831055, + "eval_logps/chosen": -402.51055908203125, + "eval_logps/rejected": -389.5133056640625, + "eval_loss": 0.6447837948799133, + "eval_rewards/accuracies": 0.6265000104904175, + "eval_rewards/chosen": -0.6976117491722107, + "eval_rewards/margins": 0.18642136454582214, + "eval_rewards/rejected": -0.8840330839157104, + "eval_runtime": 196.9091, + "eval_samples_per_second": 10.157, + "eval_steps_per_second": 5.078, + "step": 1400 + }, + { + "epoch": 0.18, + "learning_rate": 4.89222646463754e-06, + "logits/chosen": -2.8868727684020996, + "logits/rejected": -2.8568384647369385, + "logps/chosen": -393.14556884765625, + "logps/rejected": -396.2594299316406, + "loss": 0.6537, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.7115556001663208, + "rewards/margins": 0.20605134963989258, + "rewards/rejected": -0.9176069498062134, + "step": 1410 + }, + { + "epoch": 0.18, + "eval_logits/chosen": -2.614816665649414, + "eval_logits/rejected": -2.629605770111084, + "eval_logps/chosen": -400.4852600097656, + "eval_logps/rejected": -387.1134948730469, + "eval_loss": 0.6440988183021545, + "eval_rewards/accuracies": 0.6274999976158142, + "eval_rewards/chosen": -0.6773582100868225, + "eval_rewards/margins": 0.18267665803432465, + "eval_rewards/rejected": -0.8600347638130188, + "eval_runtime": 197.1086, + "eval_samples_per_second": 10.147, + "eval_steps_per_second": 5.073, + "step": 1410 + }, + { + "epoch": 0.19, + "learning_rate": 4.888883912440642e-06, + "logits/chosen": -2.8805582523345947, + "logits/rejected": -2.9014639854431152, + "logps/chosen": -458.44256591796875, + "logps/rejected": -453.98486328125, + "loss": 0.6384, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.7284379005432129, + "rewards/margins": 0.20983977615833282, + "rewards/rejected": -0.9382778406143188, + "step": 1420 + }, + { + "epoch": 0.19, + "eval_logits/chosen": -2.617636203765869, + "eval_logits/rejected": -2.6321375370025635, + "eval_logps/chosen": -399.8628845214844, + "eval_logps/rejected": -386.35150146484375, + "eval_loss": 0.6433753371238708, + "eval_rewards/accuracies": 0.628000020980835, + "eval_rewards/chosen": -0.6711348295211792, + "eval_rewards/margins": 0.1812804937362671, + "eval_rewards/rejected": -0.8524152636528015, + "eval_runtime": 196.9731, + "eval_samples_per_second": 10.154, + "eval_steps_per_second": 5.077, + "step": 1420 + }, + { + "epoch": 0.19, + "learning_rate": 4.885491492203688e-06, + "logits/chosen": -2.8176732063293457, + "logits/rejected": -2.8348517417907715, + "logps/chosen": -400.16973876953125, + "logps/rejected": -385.3367614746094, + "loss": 0.6132, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.603407084941864, + "rewards/margins": 0.2256653755903244, + "rewards/rejected": -0.8290724754333496, + "step": 1430 + }, + { + "epoch": 0.19, + "eval_logits/chosen": -2.6181766986846924, + "eval_logits/rejected": -2.632568359375, + "eval_logps/chosen": -403.1084289550781, + "eval_logps/rejected": -390.0040588378906, + "eval_loss": 0.6431609988212585, + "eval_rewards/accuracies": 0.6269999742507935, + "eval_rewards/chosen": -0.703589916229248, + "eval_rewards/margins": 0.18535077571868896, + "eval_rewards/rejected": -0.8889405727386475, + "eval_runtime": 196.9319, + "eval_samples_per_second": 10.156, + "eval_steps_per_second": 5.078, + "step": 1430 + }, + { + "epoch": 0.19, + "learning_rate": 4.882049274743578e-06, + "logits/chosen": -2.9042248725891113, + "logits/rejected": -2.891632556915283, + "logps/chosen": -448.39520263671875, + "logps/rejected": -420.09332275390625, + "loss": 0.6443, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.664592981338501, + "rewards/margins": 0.17925769090652466, + "rewards/rejected": -0.8438507318496704, + "step": 1440 + }, + { + "epoch": 0.19, + "eval_logits/chosen": -2.6198770999908447, + "eval_logits/rejected": -2.6345512866973877, + "eval_logps/chosen": -403.4805908203125, + "eval_logps/rejected": -390.4880676269531, + "eval_loss": 0.6423071622848511, + "eval_rewards/accuracies": 0.6265000104904175, + "eval_rewards/chosen": -0.7073121070861816, + "eval_rewards/margins": 0.18646840751171112, + "eval_rewards/rejected": -0.893780529499054, + "eval_runtime": 196.9843, + "eval_samples_per_second": 10.153, + "eval_steps_per_second": 5.077, + "step": 1440 + }, + { + "epoch": 0.19, + "learning_rate": 4.878557331916729e-06, + "logits/chosen": -2.8701610565185547, + "logits/rejected": -2.8831980228424072, + "logps/chosen": -390.12823486328125, + "logps/rejected": -377.2284240722656, + "loss": 0.615, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.6979155540466309, + "rewards/margins": 0.23393838107585907, + "rewards/rejected": -0.9318540692329407, + "step": 1450 + }, + { + "epoch": 0.19, + "eval_logits/chosen": -2.6227636337280273, + "eval_logits/rejected": -2.6376028060913086, + "eval_logps/chosen": -401.7284851074219, + "eval_logps/rejected": -388.5261535644531, + "eval_loss": 0.6419389843940735, + "eval_rewards/accuracies": 0.6269999742507935, + "eval_rewards/chosen": -0.689790666103363, + "eval_rewards/margins": 0.18437045812606812, + "eval_rewards/rejected": -0.8741611242294312, + "eval_runtime": 196.9963, + "eval_samples_per_second": 10.152, + "eval_steps_per_second": 5.076, + "step": 1450 + }, + { + "epoch": 0.19, + "learning_rate": 4.875015736617576e-06, + "logits/chosen": -2.7935924530029297, + "logits/rejected": -2.766704559326172, + "logps/chosen": -483.1861877441406, + "logps/rejected": -444.1046447753906, + "loss": 0.6368, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.717400074005127, + "rewards/margins": 0.21793465316295624, + "rewards/rejected": -0.9353348016738892, + "step": 1460 + }, + { + "epoch": 0.19, + "eval_logits/chosen": -2.612457036972046, + "eval_logits/rejected": -2.628230333328247, + "eval_logps/chosen": -404.81500244140625, + "eval_logps/rejected": -392.3670654296875, + "eval_loss": 0.6420219540596008, + "eval_rewards/accuracies": 0.6365000009536743, + "eval_rewards/chosen": -0.7206559777259827, + "eval_rewards/margins": 0.19191448390483856, + "eval_rewards/rejected": -0.9125705361366272, + "eval_runtime": 196.8079, + "eval_samples_per_second": 10.162, + "eval_steps_per_second": 5.081, + "step": 1460 + }, + { + "epoch": 0.19, + "learning_rate": 4.8714245627770515e-06, + "logits/chosen": -2.8471336364746094, + "logits/rejected": -2.8089940547943115, + "logps/chosen": -383.68597412109375, + "logps/rejected": -341.060546875, + "loss": 0.6896, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.7236995697021484, + "rewards/margins": 0.07411099970340729, + "rewards/rejected": -0.7978106141090393, + "step": 1470 + }, + { + "epoch": 0.19, + "eval_logits/chosen": -2.614165782928467, + "eval_logits/rejected": -2.629824638366699, + "eval_logps/chosen": -401.9931945800781, + "eval_logps/rejected": -389.17608642578125, + "eval_loss": 0.6410108804702759, + "eval_rewards/accuracies": 0.6320000290870667, + "eval_rewards/chosen": -0.6924377083778381, + "eval_rewards/margins": 0.18822318315505981, + "eval_rewards/rejected": -0.8806608319282532, + "eval_runtime": 196.7423, + "eval_samples_per_second": 10.166, + "eval_steps_per_second": 5.083, + "step": 1470 + }, + { + "epoch": 0.19, + "learning_rate": 4.8677838853610445e-06, + "logits/chosen": -2.7825706005096436, + "logits/rejected": -2.798952341079712, + "logps/chosen": -395.2989501953125, + "logps/rejected": -353.34814453125, + "loss": 0.6412, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.6694716215133667, + "rewards/margins": 0.1773361712694168, + "rewards/rejected": -0.8468077778816223, + "step": 1480 + }, + { + "epoch": 0.19, + "eval_logits/chosen": -2.6154189109802246, + "eval_logits/rejected": -2.630645990371704, + "eval_logps/chosen": -406.365478515625, + "eval_logps/rejected": -393.8523864746094, + "eval_loss": 0.6411867737770081, + "eval_rewards/accuracies": 0.6294999718666077, + "eval_rewards/chosen": -0.736160397529602, + "eval_rewards/margins": 0.19126297533512115, + "eval_rewards/rejected": -0.927423357963562, + "eval_runtime": 196.9114, + "eval_samples_per_second": 10.157, + "eval_steps_per_second": 5.078, + "step": 1480 + }, + { + "epoch": 0.19, + "learning_rate": 4.864093780368828e-06, + "logits/chosen": -2.8738656044006348, + "logits/rejected": -2.8321421146392822, + "logps/chosen": -440.78955078125, + "logps/rejected": -383.57940673828125, + "loss": 0.6064, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.676841139793396, + "rewards/margins": 0.2552284896373749, + "rewards/rejected": -0.9320695996284485, + "step": 1490 + }, + { + "epoch": 0.19, + "eval_logits/chosen": -2.6182310581207275, + "eval_logits/rejected": -2.6331787109375, + "eval_logps/chosen": -409.4355773925781, + "eval_logps/rejected": -397.0263366699219, + "eval_loss": 0.6417971849441528, + "eval_rewards/accuracies": 0.6244999766349792, + "eval_rewards/chosen": -0.7668612003326416, + "eval_rewards/margins": 0.192301943898201, + "eval_rewards/rejected": -0.9591631293296814, + "eval_runtime": 196.9899, + "eval_samples_per_second": 10.153, + "eval_steps_per_second": 5.076, + "step": 1490 + }, + { + "epoch": 0.2, + "learning_rate": 4.860354324831482e-06, + "logits/chosen": -2.844330072402954, + "logits/rejected": -2.829576015472412, + "logps/chosen": -404.71185302734375, + "logps/rejected": -419.93682861328125, + "loss": 0.6325, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.7922731637954712, + "rewards/margins": 0.1927259862422943, + "rewards/rejected": -0.9849990010261536, + "step": 1500 + }, + { + "epoch": 0.2, + "eval_logits/chosen": -2.621196985244751, + "eval_logits/rejected": -2.6360583305358887, + "eval_logps/chosen": -409.8640441894531, + "eval_logps/rejected": -397.48095703125, + "eval_loss": 0.6417466402053833, + "eval_rewards/accuracies": 0.6284999847412109, + "eval_rewards/chosen": -0.7711459994316101, + "eval_rewards/margins": 0.19256363809108734, + "eval_rewards/rejected": -0.9637096524238586, + "eval_runtime": 197.4747, + "eval_samples_per_second": 10.128, + "eval_steps_per_second": 5.064, + "step": 1500 + }, + { + "epoch": 0.2, + "learning_rate": 4.856565596810279e-06, + "logits/chosen": -2.851569652557373, + "logits/rejected": -2.8237807750701904, + "logps/chosen": -342.65606689453125, + "logps/rejected": -379.66656494140625, + "loss": 0.6424, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.7834426164627075, + "rewards/margins": 0.1825077384710312, + "rewards/rejected": -0.9659503698348999, + "step": 1510 + }, + { + "epoch": 0.2, + "eval_logits/chosen": -2.6154792308807373, + "eval_logits/rejected": -2.6306729316711426, + "eval_logps/chosen": -406.4837646484375, + "eval_logps/rejected": -394.25555419921875, + "eval_loss": 0.6402400135993958, + "eval_rewards/accuracies": 0.6320000290870667, + "eval_rewards/chosen": -0.7373436093330383, + "eval_rewards/margins": 0.19411173462867737, + "eval_rewards/rejected": -0.9314553141593933, + "eval_runtime": 197.2548, + "eval_samples_per_second": 10.139, + "eval_steps_per_second": 5.07, + "step": 1510 + }, + { + "epoch": 0.2, + "learning_rate": 4.852727675395056e-06, + "logits/chosen": -2.8235487937927246, + "logits/rejected": -2.819708824157715, + "logps/chosen": -392.69329833984375, + "logps/rejected": -371.3106384277344, + "loss": 0.5892, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.6893141269683838, + "rewards/margins": 0.29039710760116577, + "rewards/rejected": -0.9797111749649048, + "step": 1520 + }, + { + "epoch": 0.2, + "eval_logits/chosen": -2.603848695755005, + "eval_logits/rejected": -2.6197257041931152, + "eval_logps/chosen": -412.2532043457031, + "eval_logps/rejected": -401.0218505859375, + "eval_loss": 0.6410880088806152, + "eval_rewards/accuracies": 0.6355000138282776, + "eval_rewards/chosen": -0.7950379848480225, + "eval_rewards/margins": 0.20408010482788086, + "eval_rewards/rejected": -0.9991180300712585, + "eval_runtime": 197.1794, + "eval_samples_per_second": 10.143, + "eval_steps_per_second": 5.072, + "step": 1520 + }, + { + "epoch": 0.2, + "learning_rate": 4.848840640702565e-06, + "logits/chosen": -2.860694408416748, + "logits/rejected": -2.8731682300567627, + "logps/chosen": -388.0919189453125, + "logps/rejected": -359.65045166015625, + "loss": 0.7037, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.8457175493240356, + "rewards/margins": 0.07708420604467392, + "rewards/rejected": -0.9228017926216125, + "step": 1530 + }, + { + "epoch": 0.2, + "eval_logits/chosen": -2.6086678504943848, + "eval_logits/rejected": -2.6243414878845215, + "eval_logps/chosen": -410.0699768066406, + "eval_logps/rejected": -398.4937744140625, + "eval_loss": 0.6403050422668457, + "eval_rewards/accuracies": 0.6340000033378601, + "eval_rewards/chosen": -0.7732056975364685, + "eval_rewards/margins": 0.2006317377090454, + "eval_rewards/rejected": -0.9738374948501587, + "eval_runtime": 197.0745, + "eval_samples_per_second": 10.148, + "eval_steps_per_second": 5.074, + "step": 1530 + }, + { + "epoch": 0.2, + "learning_rate": 4.844904573874798e-06, + "logits/chosen": -2.774444103240967, + "logits/rejected": -2.805631160736084, + "logps/chosen": -408.6401062011719, + "logps/rejected": -373.6168518066406, + "loss": 0.6159, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.6792012453079224, + "rewards/margins": 0.25484994053840637, + "rewards/rejected": -0.9340512156486511, + "step": 1540 + }, + { + "epoch": 0.2, + "eval_logits/chosen": -2.6074650287628174, + "eval_logits/rejected": -2.622997522354126, + "eval_logps/chosen": -405.1261901855469, + "eval_logps/rejected": -393.0093078613281, + "eval_loss": 0.6390379071235657, + "eval_rewards/accuracies": 0.6340000033378601, + "eval_rewards/chosen": -0.7237675786018372, + "eval_rewards/margins": 0.19522573053836823, + "eval_rewards/rejected": -0.9189932942390442, + "eval_runtime": 196.8614, + "eval_samples_per_second": 10.159, + "eval_steps_per_second": 5.08, + "step": 1540 + }, + { + "epoch": 0.2, + "learning_rate": 4.840919557077297e-06, + "logits/chosen": -2.831430435180664, + "logits/rejected": -2.780000686645508, + "logps/chosen": -406.05914306640625, + "logps/rejected": -365.9083557128906, + "loss": 0.6365, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.6498536467552185, + "rewards/margins": 0.18074217438697815, + "rewards/rejected": -0.8305959701538086, + "step": 1550 + }, + { + "epoch": 0.2, + "eval_logits/chosen": -2.605069398880005, + "eval_logits/rejected": -2.6206929683685303, + "eval_logps/chosen": -405.4744873046875, + "eval_logps/rejected": -393.349853515625, + "eval_loss": 0.6390611529350281, + "eval_rewards/accuracies": 0.6370000243186951, + "eval_rewards/chosen": -0.7272511720657349, + "eval_rewards/margins": 0.19514717161655426, + "eval_rewards/rejected": -0.9223982691764832, + "eval_runtime": 196.8733, + "eval_samples_per_second": 10.159, + "eval_steps_per_second": 5.079, + "step": 1550 + }, + { + "epoch": 0.2, + "learning_rate": 4.836885673497435e-06, + "logits/chosen": -2.8119847774505615, + "logits/rejected": -2.7871992588043213, + "logps/chosen": -415.7240295410156, + "logps/rejected": -404.88671875, + "loss": 0.6055, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.7152377963066101, + "rewards/margins": 0.26963186264038086, + "rewards/rejected": -0.9848695993423462, + "step": 1560 + }, + { + "epoch": 0.2, + "eval_logits/chosen": -2.594703435897827, + "eval_logits/rejected": -2.611009359359741, + "eval_logps/chosen": -406.0096130371094, + "eval_logps/rejected": -394.2928771972656, + "eval_loss": 0.6390554308891296, + "eval_rewards/accuracies": 0.6334999799728394, + "eval_rewards/chosen": -0.7326022982597351, + "eval_rewards/margins": 0.19922657310962677, + "eval_rewards/rejected": -0.9318288564682007, + "eval_runtime": 197.1571, + "eval_samples_per_second": 10.144, + "eval_steps_per_second": 5.072, + "step": 1560 + }, + { + "epoch": 0.21, + "learning_rate": 4.832803007342679e-06, + "logits/chosen": -2.81030011177063, + "logits/rejected": -2.7911148071289062, + "logps/chosen": -373.5116271972656, + "logps/rejected": -403.6263122558594, + "loss": 0.6129, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.7071263194084167, + "rewards/margins": 0.2654086947441101, + "rewards/rejected": -0.9725350141525269, + "step": 1570 + }, + { + "epoch": 0.21, + "eval_logits/chosen": -2.581125497817993, + "eval_logits/rejected": -2.598928928375244, + "eval_logps/chosen": -403.1785583496094, + "eval_logps/rejected": -391.4497375488281, + "eval_loss": 0.6401770114898682, + "eval_rewards/accuracies": 0.6359999775886536, + "eval_rewards/chosen": -0.7042912840843201, + "eval_rewards/margins": 0.19910559058189392, + "eval_rewards/rejected": -0.9033968448638916, + "eval_runtime": 197.0534, + "eval_samples_per_second": 10.15, + "eval_steps_per_second": 5.075, + "step": 1570 + }, + { + "epoch": 0.21, + "learning_rate": 4.828671643838839e-06, + "logits/chosen": -2.712752103805542, + "logits/rejected": -2.713848829269409, + "logps/chosen": -387.67559814453125, + "logps/rejected": -354.3922119140625, + "loss": 0.6286, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.6284931302070618, + "rewards/margins": 0.22074835002422333, + "rewards/rejected": -0.8492414355278015, + "step": 1580 + }, + { + "epoch": 0.21, + "eval_logits/chosen": -2.5615954399108887, + "eval_logits/rejected": -2.5810999870300293, + "eval_logps/chosen": -411.4100036621094, + "eval_logps/rejected": -401.1250915527344, + "eval_loss": 0.6424925923347473, + "eval_rewards/accuracies": 0.6380000114440918, + "eval_rewards/chosen": -0.786605715751648, + "eval_rewards/margins": 0.2135448008775711, + "eval_rewards/rejected": -1.0001505613327026, + "eval_runtime": 197.1811, + "eval_samples_per_second": 10.143, + "eval_steps_per_second": 5.071, + "step": 1580 + }, + { + "epoch": 0.21, + "learning_rate": 4.824491669228279e-06, + "logits/chosen": -2.6709794998168945, + "logits/rejected": -2.7086164951324463, + "logps/chosen": -367.11236572265625, + "logps/rejected": -356.0587463378906, + "loss": 0.6803, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.7548516988754272, + "rewards/margins": 0.10954463481903076, + "rewards/rejected": -0.8643962740898132, + "step": 1590 + }, + { + "epoch": 0.21, + "eval_logits/chosen": -2.560438632965088, + "eval_logits/rejected": -2.580240249633789, + "eval_logps/chosen": -416.12518310546875, + "eval_logps/rejected": -406.64556884765625, + "eval_loss": 0.6424650549888611, + "eval_rewards/accuracies": 0.637499988079071, + "eval_rewards/chosen": -0.8337578177452087, + "eval_rewards/margins": 0.22159793972969055, + "eval_rewards/rejected": -1.0553555488586426, + "eval_runtime": 196.9846, + "eval_samples_per_second": 10.153, + "eval_steps_per_second": 5.077, + "step": 1590 + }, + { + "epoch": 0.21, + "learning_rate": 4.8202631707681245e-06, + "logits/chosen": -2.7330760955810547, + "logits/rejected": -2.6766715049743652, + "logps/chosen": -382.569091796875, + "logps/rejected": -383.3133850097656, + "loss": 0.6037, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.8243730664253235, + "rewards/margins": 0.2924764156341553, + "rewards/rejected": -1.1168495416641235, + "step": 1600 + }, + { + "epoch": 0.21, + "eval_logits/chosen": -2.5603301525115967, + "eval_logits/rejected": -2.580162286758423, + "eval_logps/chosen": -415.4783630371094, + "eval_logps/rejected": -405.836181640625, + "eval_loss": 0.6421064734458923, + "eval_rewards/accuracies": 0.6399999856948853, + "eval_rewards/chosen": -0.8272896409034729, + "eval_rewards/margins": 0.21997201442718506, + "eval_rewards/rejected": -1.0472615957260132, + "eval_runtime": 197.0669, + "eval_samples_per_second": 10.149, + "eval_steps_per_second": 5.074, + "step": 1600 + }, + { + "epoch": 0.21, + "learning_rate": 4.815986236728437e-06, + "logits/chosen": -2.7097089290618896, + "logits/rejected": -2.7386956214904785, + "logps/chosen": -411.9666442871094, + "logps/rejected": -413.4825134277344, + "loss": 0.7021, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.9289957284927368, + "rewards/margins": 0.10253496468067169, + "rewards/rejected": -1.031530737876892, + "step": 1610 + }, + { + "epoch": 0.21, + "eval_logits/chosen": -2.5622901916503906, + "eval_logits/rejected": -2.5816233158111572, + "eval_logps/chosen": -403.5919494628906, + "eval_logps/rejected": -392.15234375, + "eval_loss": 0.642052173614502, + "eval_rewards/accuracies": 0.6340000033378601, + "eval_rewards/chosen": -0.7084251046180725, + "eval_rewards/margins": 0.20199787616729736, + "eval_rewards/rejected": -0.9104229807853699, + "eval_runtime": 196.9541, + "eval_samples_per_second": 10.155, + "eval_steps_per_second": 5.077, + "step": 1610 + }, + { + "epoch": 0.21, + "learning_rate": 4.811660956390372e-06, + "logits/chosen": -2.777519702911377, + "logits/rejected": -2.7863945960998535, + "logps/chosen": -442.59698486328125, + "logps/rejected": -414.566650390625, + "loss": 0.6388, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.674359142780304, + "rewards/margins": 0.1822533905506134, + "rewards/rejected": -0.8566125631332397, + "step": 1620 + }, + { + "epoch": 0.21, + "eval_logits/chosen": -2.5668702125549316, + "eval_logits/rejected": -2.5854568481445312, + "eval_logps/chosen": -398.8391418457031, + "eval_logps/rejected": -386.6023254394531, + "eval_loss": 0.641032874584198, + "eval_rewards/accuracies": 0.6315000057220459, + "eval_rewards/chosen": -0.6608973145484924, + "eval_rewards/margins": 0.194025918841362, + "eval_rewards/rejected": -0.8549233078956604, + "eval_runtime": 197.3348, + "eval_samples_per_second": 10.135, + "eval_steps_per_second": 5.068, + "step": 1620 + }, + { + "epoch": 0.21, + "learning_rate": 4.807287420044319e-06, + "logits/chosen": -2.8177196979522705, + "logits/rejected": -2.841592311859131, + "logps/chosen": -351.3625183105469, + "logps/rejected": -361.21868896484375, + "loss": 0.5835, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.6561750173568726, + "rewards/margins": 0.33405548334121704, + "rewards/rejected": -0.9902304410934448, + "step": 1630 + }, + { + "epoch": 0.21, + "eval_logits/chosen": -2.559098958969116, + "eval_logits/rejected": -2.577807664871216, + "eval_logps/chosen": -405.84942626953125, + "eval_logps/rejected": -394.82330322265625, + "eval_loss": 0.6402274370193481, + "eval_rewards/accuracies": 0.6365000009536743, + "eval_rewards/chosen": -0.730999767780304, + "eval_rewards/margins": 0.2061331868171692, + "eval_rewards/rejected": -0.9371330738067627, + "eval_runtime": 197.2599, + "eval_samples_per_second": 10.139, + "eval_steps_per_second": 5.069, + "step": 1630 + }, + { + "epoch": 0.21, + "learning_rate": 4.802865718988008e-06, + "logits/chosen": -2.748746633529663, + "logits/rejected": -2.730214834213257, + "logps/chosen": -355.8330993652344, + "logps/rejected": -422.69281005859375, + "loss": 0.6083, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.7712303996086121, + "rewards/margins": 0.3000728189945221, + "rewards/rejected": -1.0713032484054565, + "step": 1640 + }, + { + "epoch": 0.21, + "eval_logits/chosen": -2.5498390197753906, + "eval_logits/rejected": -2.5689785480499268, + "eval_logps/chosen": -413.0655517578125, + "eval_logps/rejected": -403.24359130859375, + "eval_loss": 0.6408534646034241, + "eval_rewards/accuracies": 0.6340000033378601, + "eval_rewards/chosen": -0.8031615614891052, + "eval_rewards/margins": 0.21817424893379211, + "eval_rewards/rejected": -1.0213358402252197, + "eval_runtime": 197.1474, + "eval_samples_per_second": 10.145, + "eval_steps_per_second": 5.072, + "step": 1640 + }, + { + "epoch": 0.22, + "learning_rate": 4.798395945524615e-06, + "logits/chosen": -2.8017356395721436, + "logits/rejected": -2.8132927417755127, + "logps/chosen": -401.31146240234375, + "logps/rejected": -392.885986328125, + "loss": 0.6022, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.7307563424110413, + "rewards/margins": 0.2870885729789734, + "rewards/rejected": -1.0178449153900146, + "step": 1650 + }, + { + "epoch": 0.22, + "eval_logits/chosen": -2.5515244007110596, + "eval_logits/rejected": -2.5709784030914307, + "eval_logps/chosen": -419.0445861816406, + "eval_logps/rejected": -410.1859436035156, + "eval_loss": 0.6414780616760254, + "eval_rewards/accuracies": 0.6365000009536743, + "eval_rewards/chosen": -0.8629518151283264, + "eval_rewards/margins": 0.2278074324131012, + "eval_rewards/rejected": -1.09075927734375, + "eval_runtime": 197.2586, + "eval_samples_per_second": 10.139, + "eval_steps_per_second": 5.069, + "step": 1650 + }, + { + "epoch": 0.22, + "learning_rate": 4.793878192960823e-06, + "logits/chosen": -2.798947811126709, + "logits/rejected": -2.8003056049346924, + "logps/chosen": -469.7757873535156, + "logps/rejected": -475.6380920410156, + "loss": 0.6203, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.8466376066207886, + "rewards/margins": 0.2910873293876648, + "rewards/rejected": -1.1377251148223877, + "step": 1660 + }, + { + "epoch": 0.22, + "eval_logits/chosen": -2.5631649494171143, + "eval_logits/rejected": -2.5823311805725098, + "eval_logps/chosen": -417.77740478515625, + "eval_logps/rejected": -408.8164978027344, + "eval_loss": 0.640652596950531, + "eval_rewards/accuracies": 0.6370000243186951, + "eval_rewards/chosen": -0.8502798676490784, + "eval_rewards/margins": 0.22678521275520325, + "eval_rewards/rejected": -1.0770649909973145, + "eval_runtime": 197.2142, + "eval_samples_per_second": 10.141, + "eval_steps_per_second": 5.071, + "step": 1660 + }, + { + "epoch": 0.22, + "learning_rate": 4.789312555604887e-06, + "logits/chosen": -2.800078868865967, + "logits/rejected": -2.7635836601257324, + "logps/chosen": -386.3752136230469, + "logps/rejected": -376.82330322265625, + "loss": 0.6444, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.8182946443557739, + "rewards/margins": 0.21323814988136292, + "rewards/rejected": -1.0315327644348145, + "step": 1670 + }, + { + "epoch": 0.22, + "eval_logits/chosen": -2.573129415512085, + "eval_logits/rejected": -2.5919148921966553, + "eval_logps/chosen": -416.8188781738281, + "eval_logps/rejected": -407.67938232421875, + "eval_loss": 0.6396322250366211, + "eval_rewards/accuracies": 0.6355000138282776, + "eval_rewards/chosen": -0.8406945466995239, + "eval_rewards/margins": 0.22499865293502808, + "eval_rewards/rejected": -1.0656932592391968, + "eval_runtime": 197.0109, + "eval_samples_per_second": 10.152, + "eval_steps_per_second": 5.076, + "step": 1670 + }, + { + "epoch": 0.22, + "learning_rate": 4.784699128764654e-06, + "logits/chosen": -2.8030784130096436, + "logits/rejected": -2.8131110668182373, + "logps/chosen": -383.93353271484375, + "logps/rejected": -387.96978759765625, + "loss": 0.6084, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.7460604906082153, + "rewards/margins": 0.316192626953125, + "rewards/rejected": -1.0622531175613403, + "step": 1680 + }, + { + "epoch": 0.22, + "eval_logits/chosen": -2.5807809829711914, + "eval_logits/rejected": -2.5989623069763184, + "eval_logps/chosen": -413.2848815917969, + "eval_logps/rejected": -403.7291564941406, + "eval_loss": 0.638802170753479, + "eval_rewards/accuracies": 0.6380000114440918, + "eval_rewards/chosen": -0.8053548336029053, + "eval_rewards/margins": 0.22083649039268494, + "eval_rewards/rejected": -1.026191234588623, + "eval_runtime": 196.9795, + "eval_samples_per_second": 10.153, + "eval_steps_per_second": 5.077, + "step": 1680 + }, + { + "epoch": 0.22, + "learning_rate": 4.780038008745581e-06, + "logits/chosen": -2.791762351989746, + "logits/rejected": -2.80530047416687, + "logps/chosen": -440.07928466796875, + "logps/rejected": -404.647216796875, + "loss": 0.6797, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.8743046522140503, + "rewards/margins": 0.13973672688007355, + "rewards/rejected": -1.0140413045883179, + "step": 1690 + }, + { + "epoch": 0.22, + "eval_logits/chosen": -2.58063006401062, + "eval_logits/rejected": -2.5980546474456787, + "eval_logps/chosen": -415.2863464355469, + "eval_logps/rejected": -405.9435119628906, + "eval_loss": 0.6387109160423279, + "eval_rewards/accuracies": 0.640999972820282, + "eval_rewards/chosen": -0.825369119644165, + "eval_rewards/margins": 0.22296535968780518, + "eval_rewards/rejected": -1.0483345985412598, + "eval_runtime": 197.0549, + "eval_samples_per_second": 10.149, + "eval_steps_per_second": 5.075, + "step": 1690 + }, + { + "epoch": 0.22, + "learning_rate": 4.775329292848721e-06, + "logits/chosen": -2.744279384613037, + "logits/rejected": -2.7326884269714355, + "logps/chosen": -434.35479736328125, + "logps/rejected": -432.7107849121094, + "loss": 0.6111, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.7824715375900269, + "rewards/margins": 0.2711241543292999, + "rewards/rejected": -1.053595781326294, + "step": 1700 + }, + { + "epoch": 0.22, + "eval_logits/chosen": -2.5749504566192627, + "eval_logits/rejected": -2.591935157775879, + "eval_logps/chosen": -414.1108703613281, + "eval_logps/rejected": -404.650634765625, + "eval_loss": 0.6384560465812683, + "eval_rewards/accuracies": 0.6389999985694885, + "eval_rewards/chosen": -0.8136144280433655, + "eval_rewards/margins": 0.22179150581359863, + "eval_rewards/rejected": -1.0354059934616089, + "eval_runtime": 197.0131, + "eval_samples_per_second": 10.152, + "eval_steps_per_second": 5.076, + "step": 1700 + }, + { + "epoch": 0.22, + "learning_rate": 4.770573079368691e-06, + "logits/chosen": -2.7748918533325195, + "logits/rejected": -2.78712797164917, + "logps/chosen": -386.3089294433594, + "logps/rejected": -385.2343444824219, + "loss": 0.6356, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.738974392414093, + "rewards/margins": 0.2040799856185913, + "rewards/rejected": -0.9430543184280396, + "step": 1710 + }, + { + "epoch": 0.22, + "eval_logits/chosen": -2.5670783519744873, + "eval_logits/rejected": -2.583617925643921, + "eval_logps/chosen": -414.2315673828125, + "eval_logps/rejected": -404.7968444824219, + "eval_loss": 0.6393074989318848, + "eval_rewards/accuracies": 0.6340000033378601, + "eval_rewards/chosen": -0.8148214221000671, + "eval_rewards/margins": 0.22204671800136566, + "eval_rewards/rejected": -1.0368682146072388, + "eval_runtime": 197.153, + "eval_samples_per_second": 10.144, + "eval_steps_per_second": 5.072, + "step": 1710 + }, + { + "epoch": 0.23, + "learning_rate": 4.765769467591626e-06, + "logits/chosen": -2.85074520111084, + "logits/rejected": -2.835679769515991, + "logps/chosen": -432.10015869140625, + "logps/rejected": -445.62615966796875, + "loss": 0.5897, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.7564738988876343, + "rewards/margins": 0.3326117694377899, + "rewards/rejected": -1.0890856981277466, + "step": 1720 + }, + { + "epoch": 0.23, + "eval_logits/chosen": -2.5621254444122314, + "eval_logits/rejected": -2.5779037475585938, + "eval_logps/chosen": -415.5030517578125, + "eval_logps/rejected": -406.2257080078125, + "eval_loss": 0.6412656307220459, + "eval_rewards/accuracies": 0.6349999904632568, + "eval_rewards/chosen": -0.8275365829467773, + "eval_rewards/margins": 0.22362031042575836, + "eval_rewards/rejected": -1.0511568784713745, + "eval_runtime": 197.046, + "eval_samples_per_second": 10.15, + "eval_steps_per_second": 5.075, + "step": 1720 + }, + { + "epoch": 0.23, + "learning_rate": 4.760918557793096e-06, + "logits/chosen": -2.8191890716552734, + "logits/rejected": -2.869262933731079, + "logps/chosen": -387.35528564453125, + "logps/rejected": -415.16461181640625, + "loss": 0.6146, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.7838236093521118, + "rewards/margins": 0.272321879863739, + "rewards/rejected": -1.056145429611206, + "step": 1730 + }, + { + "epoch": 0.23, + "eval_logits/chosen": -2.558769702911377, + "eval_logits/rejected": -2.5744855403900146, + "eval_logps/chosen": -418.4196472167969, + "eval_logps/rejected": -409.6731262207031, + "eval_loss": 0.6429142951965332, + "eval_rewards/accuracies": 0.6340000033378601, + "eval_rewards/chosen": -0.8567026853561401, + "eval_rewards/margins": 0.2289285808801651, + "eval_rewards/rejected": -1.0856313705444336, + "eval_runtime": 197.0094, + "eval_samples_per_second": 10.152, + "eval_steps_per_second": 5.076, + "step": 1730 + }, + { + "epoch": 0.23, + "learning_rate": 4.756020451236025e-06, + "logits/chosen": -2.7810559272766113, + "logits/rejected": -2.7768383026123047, + "logps/chosen": -457.4143981933594, + "logps/rejected": -444.75103759765625, + "loss": 0.6418, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.8151463270187378, + "rewards/margins": 0.20836929976940155, + "rewards/rejected": -1.0235155820846558, + "step": 1740 + }, + { + "epoch": 0.23, + "eval_logits/chosen": -2.5561814308166504, + "eval_logits/rejected": -2.5724422931671143, + "eval_logps/chosen": -423.25457763671875, + "eval_logps/rejected": -415.39715576171875, + "eval_loss": 0.6437353491783142, + "eval_rewards/accuracies": 0.6294999718666077, + "eval_rewards/chosen": -0.9050517082214355, + "eval_rewards/margins": 0.23781974613666534, + "eval_rewards/rejected": -1.142871618270874, + "eval_runtime": 197.0793, + "eval_samples_per_second": 10.148, + "eval_steps_per_second": 5.074, + "step": 1740 + }, + { + "epoch": 0.23, + "learning_rate": 4.751075250168569e-06, + "logits/chosen": -2.835005044937134, + "logits/rejected": -2.7781484127044678, + "logps/chosen": -424.66680908203125, + "logps/rejected": -400.25689697265625, + "loss": 0.6322, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.9134689569473267, + "rewards/margins": 0.2810933589935303, + "rewards/rejected": -1.194562315940857, + "step": 1750 + }, + { + "epoch": 0.23, + "eval_logits/chosen": -2.5711569786071777, + "eval_logits/rejected": -2.58683180809021, + "eval_logps/chosen": -418.1545715332031, + "eval_logps/rejected": -409.7334899902344, + "eval_loss": 0.6412755846977234, + "eval_rewards/accuracies": 0.6340000033378601, + "eval_rewards/chosen": -0.8540514707565308, + "eval_rewards/margins": 0.2321833074092865, + "eval_rewards/rejected": -1.0862348079681396, + "eval_runtime": 197.233, + "eval_samples_per_second": 10.14, + "eval_steps_per_second": 5.07, + "step": 1750 + }, + { + "epoch": 0.23, + "learning_rate": 4.746083057821981e-06, + "logits/chosen": -2.772454023361206, + "logits/rejected": -2.716813802719116, + "logps/chosen": -393.3017578125, + "logps/rejected": -371.563720703125, + "loss": 0.628, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.802009105682373, + "rewards/margins": 0.30267855525016785, + "rewards/rejected": -1.1046876907348633, + "step": 1760 + }, + { + "epoch": 0.23, + "eval_logits/chosen": -2.6006784439086914, + "eval_logits/rejected": -2.6152594089508057, + "eval_logps/chosen": -411.67431640625, + "eval_logps/rejected": -402.28314208984375, + "eval_loss": 0.6368669867515564, + "eval_rewards/accuracies": 0.6309999823570251, + "eval_rewards/chosen": -0.7892491221427917, + "eval_rewards/margins": 0.22248202562332153, + "eval_rewards/rejected": -1.0117310285568237, + "eval_runtime": 197.0626, + "eval_samples_per_second": 10.149, + "eval_steps_per_second": 5.075, + "step": 1760 + }, + { + "epoch": 0.23, + "learning_rate": 4.741043978408463e-06, + "logits/chosen": -2.781284809112549, + "logits/rejected": -2.7620162963867188, + "logps/chosen": -382.1107177734375, + "logps/rejected": -418.96221923828125, + "loss": 0.5509, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.6648024916648865, + "rewards/margins": 0.4300464689731598, + "rewards/rejected": -1.0948489904403687, + "step": 1770 + }, + { + "epoch": 0.23, + "eval_logits/chosen": -2.604132652282715, + "eval_logits/rejected": -2.619030475616455, + "eval_logps/chosen": -411.5599365234375, + "eval_logps/rejected": -402.39544677734375, + "eval_loss": 0.6368661522865295, + "eval_rewards/accuracies": 0.6290000081062317, + "eval_rewards/chosen": -0.7881054282188416, + "eval_rewards/margins": 0.22474880516529083, + "eval_rewards/rejected": -1.0128542184829712, + "eval_runtime": 197.048, + "eval_samples_per_second": 10.15, + "eval_steps_per_second": 5.075, + "step": 1770 + }, + { + "epoch": 0.23, + "learning_rate": 4.735958117118983e-06, + "logits/chosen": -2.8062703609466553, + "logits/rejected": -2.8215584754943848, + "logps/chosen": -431.8072204589844, + "logps/rejected": -423.52813720703125, + "loss": 0.5959, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.6866645812988281, + "rewards/margins": 0.31908783316612244, + "rewards/rejected": -1.0057523250579834, + "step": 1780 + }, + { + "epoch": 0.23, + "eval_logits/chosen": -2.6018896102905273, + "eval_logits/rejected": -2.617478847503662, + "eval_logps/chosen": -407.46307373046875, + "eval_logps/rejected": -397.8551940917969, + "eval_loss": 0.6371034979820251, + "eval_rewards/accuracies": 0.6294999718666077, + "eval_rewards/chosen": -0.747136116027832, + "eval_rewards/margins": 0.2203156054019928, + "eval_rewards/rejected": -0.9674516320228577, + "eval_runtime": 196.9701, + "eval_samples_per_second": 10.154, + "eval_steps_per_second": 5.077, + "step": 1780 + }, + { + "epoch": 0.23, + "learning_rate": 4.730825580121084e-06, + "logits/chosen": -2.8423948287963867, + "logits/rejected": -2.8654932975769043, + "logps/chosen": -381.26007080078125, + "logps/rejected": -395.092529296875, + "loss": 0.6137, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.7086172699928284, + "rewards/margins": 0.26496243476867676, + "rewards/rejected": -0.9735797047615051, + "step": 1790 + }, + { + "epoch": 0.23, + "eval_logits/chosen": -2.5999481678009033, + "eval_logits/rejected": -2.615683078765869, + "eval_logps/chosen": -411.8017272949219, + "eval_logps/rejected": -402.99560546875, + "eval_loss": 0.6369568109512329, + "eval_rewards/accuracies": 0.6265000104904175, + "eval_rewards/chosen": -0.7905230522155762, + "eval_rewards/margins": 0.22833256423473358, + "eval_rewards/rejected": -1.0188556909561157, + "eval_runtime": 197.0514, + "eval_samples_per_second": 10.15, + "eval_steps_per_second": 5.075, + "step": 1790 + }, + { + "epoch": 0.24, + "learning_rate": 4.725646474556666e-06, + "logits/chosen": -2.830599069595337, + "logits/rejected": -2.8327298164367676, + "logps/chosen": -359.54388427734375, + "logps/rejected": -399.54827880859375, + "loss": 0.6518, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.7748910188674927, + "rewards/margins": 0.28433313965797424, + "rewards/rejected": -1.059224247932434, + "step": 1800 + }, + { + "epoch": 0.24, + "eval_logits/chosen": -2.5959720611572266, + "eval_logits/rejected": -2.612139940261841, + "eval_logps/chosen": -415.753662109375, + "eval_logps/rejected": -407.6683349609375, + "eval_loss": 0.6366816759109497, + "eval_rewards/accuracies": 0.6309999823570251, + "eval_rewards/chosen": -0.8300423622131348, + "eval_rewards/margins": 0.23554080724716187, + "eval_rewards/rejected": -1.0655831098556519, + "eval_runtime": 196.8886, + "eval_samples_per_second": 10.158, + "eval_steps_per_second": 5.079, + "step": 1800 + }, + { + "epoch": 0.24, + "learning_rate": 4.720420908539748e-06, + "logits/chosen": -2.840127468109131, + "logits/rejected": -2.816035509109497, + "logps/chosen": -392.6610107421875, + "logps/rejected": -403.17266845703125, + "loss": 0.6444, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.8145529627799988, + "rewards/margins": 0.18399588763713837, + "rewards/rejected": -0.9985488653182983, + "step": 1810 + }, + { + "epoch": 0.24, + "eval_logits/chosen": -2.5939080715179443, + "eval_logits/rejected": -2.610529899597168, + "eval_logps/chosen": -414.0361022949219, + "eval_logps/rejected": -405.6206970214844, + "eval_loss": 0.635891854763031, + "eval_rewards/accuracies": 0.6315000057220459, + "eval_rewards/chosen": -0.8128669857978821, + "eval_rewards/margins": 0.23223945498466492, + "eval_rewards/rejected": -1.0451064109802246, + "eval_runtime": 196.9676, + "eval_samples_per_second": 10.154, + "eval_steps_per_second": 5.077, + "step": 1810 + }, + { + "epoch": 0.24, + "learning_rate": 4.715148991154216e-06, + "logits/chosen": -2.904259204864502, + "logits/rejected": -2.9085910320281982, + "logps/chosen": -504.03497314453125, + "logps/rejected": -511.499755859375, + "loss": 0.645, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.8387717008590698, + "rewards/margins": 0.20440442860126495, + "rewards/rejected": -1.0431760549545288, + "step": 1820 + }, + { + "epoch": 0.24, + "eval_logits/chosen": -2.588139533996582, + "eval_logits/rejected": -2.6048943996429443, + "eval_logps/chosen": -413.64398193359375, + "eval_logps/rejected": -405.2970886230469, + "eval_loss": 0.63616943359375, + "eval_rewards/accuracies": 0.6334999799728394, + "eval_rewards/chosen": -0.80894535779953, + "eval_rewards/margins": 0.2329251766204834, + "eval_rewards/rejected": -1.0418705940246582, + "eval_runtime": 197.0152, + "eval_samples_per_second": 10.151, + "eval_steps_per_second": 5.076, + "step": 1820 + }, + { + "epoch": 0.24, + "learning_rate": 4.709830832451538e-06, + "logits/chosen": -2.843167781829834, + "logits/rejected": -2.848705768585205, + "logps/chosen": -467.8697814941406, + "logps/rejected": -467.73309326171875, + "loss": 0.6188, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.8765204548835754, + "rewards/margins": 0.2673446536064148, + "rewards/rejected": -1.1438651084899902, + "step": 1830 + }, + { + "epoch": 0.24, + "eval_logits/chosen": -2.5781211853027344, + "eval_logits/rejected": -2.595245122909546, + "eval_logps/chosen": -419.8053894042969, + "eval_logps/rejected": -412.53533935546875, + "eval_loss": 0.6371971368789673, + "eval_rewards/accuracies": 0.6265000104904175, + "eval_rewards/chosen": -0.8705599308013916, + "eval_rewards/margins": 0.24369306862354279, + "eval_rewards/rejected": -1.1142529249191284, + "eval_runtime": 196.9225, + "eval_samples_per_second": 10.156, + "eval_steps_per_second": 5.078, + "step": 1830 + }, + { + "epoch": 0.24, + "learning_rate": 4.704466543448477e-06, + "logits/chosen": -2.710594654083252, + "logits/rejected": -2.70381498336792, + "logps/chosen": -495.4413146972656, + "logps/rejected": -459.764404296875, + "loss": 0.597, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.8346297144889832, + "rewards/margins": 0.32266736030578613, + "rewards/rejected": -1.1572970151901245, + "step": 1840 + }, + { + "epoch": 0.24, + "eval_logits/chosen": -2.572765350341797, + "eval_logits/rejected": -2.590297222137451, + "eval_logps/chosen": -416.5643005371094, + "eval_logps/rejected": -409.066650390625, + "eval_loss": 0.6378411650657654, + "eval_rewards/accuracies": 0.6244999766349792, + "eval_rewards/chosen": -0.8381485939025879, + "eval_rewards/margins": 0.24141810834407806, + "eval_rewards/rejected": -1.0795667171478271, + "eval_runtime": 197.3294, + "eval_samples_per_second": 10.135, + "eval_steps_per_second": 5.068, + "step": 1840 + }, + { + "epoch": 0.24, + "learning_rate": 4.699056236124762e-06, + "logits/chosen": -2.7791919708251953, + "logits/rejected": -2.8077704906463623, + "logps/chosen": -398.36260986328125, + "logps/rejected": -419.29071044921875, + "loss": 0.6169, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.7714325189590454, + "rewards/margins": 0.26381996273994446, + "rewards/rejected": -1.0352524518966675, + "step": 1850 + }, + { + "epoch": 0.24, + "eval_logits/chosen": -2.5729434490203857, + "eval_logits/rejected": -2.590017080307007, + "eval_logps/chosen": -417.69183349609375, + "eval_logps/rejected": -410.3074645996094, + "eval_loss": 0.6380077600479126, + "eval_rewards/accuracies": 0.6255000233650208, + "eval_rewards/chosen": -0.8494245409965515, + "eval_rewards/margins": 0.24255014955997467, + "eval_rewards/rejected": -1.0919746160507202, + "eval_runtime": 197.0483, + "eval_samples_per_second": 10.15, + "eval_steps_per_second": 5.075, + "step": 1850 + }, + { + "epoch": 0.24, + "learning_rate": 4.693600023420758e-06, + "logits/chosen": -2.8519492149353027, + "logits/rejected": -2.817288875579834, + "logps/chosen": -445.31585693359375, + "logps/rejected": -393.69781494140625, + "loss": 0.5578, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.7918586730957031, + "rewards/margins": 0.4510478973388672, + "rewards/rejected": -1.2429064512252808, + "step": 1860 + }, + { + "epoch": 0.24, + "eval_logits/chosen": -2.564570665359497, + "eval_logits/rejected": -2.5810608863830566, + "eval_logps/chosen": -421.4671630859375, + "eval_logps/rejected": -414.6640625, + "eval_loss": 0.6403253078460693, + "eval_rewards/accuracies": 0.6255000233650208, + "eval_rewards/chosen": -0.8871776461601257, + "eval_rewards/margins": 0.24836279451847076, + "eval_rewards/rejected": -1.1355403661727905, + "eval_runtime": 197.0186, + "eval_samples_per_second": 10.151, + "eval_steps_per_second": 5.076, + "step": 1860 + }, + { + "epoch": 0.24, + "learning_rate": 4.688098019235108e-06, + "logits/chosen": -2.7748916149139404, + "logits/rejected": -2.7554211616516113, + "logps/chosen": -453.495361328125, + "logps/rejected": -460.4736328125, + "loss": 0.6017, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.8699310421943665, + "rewards/margins": 0.34119826555252075, + "rewards/rejected": -1.2111294269561768, + "step": 1870 + }, + { + "epoch": 0.24, + "eval_logits/chosen": -2.5702080726623535, + "eval_logits/rejected": -2.5860989093780518, + "eval_logps/chosen": -423.2950134277344, + "eval_logps/rejected": -416.7840576171875, + "eval_loss": 0.6397432088851929, + "eval_rewards/accuracies": 0.6244999766349792, + "eval_rewards/chosen": -0.9054557085037231, + "eval_rewards/margins": 0.25128448009490967, + "eval_rewards/rejected": -1.1567401885986328, + "eval_runtime": 197.0154, + "eval_samples_per_second": 10.151, + "eval_steps_per_second": 5.076, + "step": 1870 + }, + { + "epoch": 0.25, + "learning_rate": 4.682550338422353e-06, + "logits/chosen": -2.7921640872955322, + "logits/rejected": -2.791607618331909, + "logps/chosen": -424.34735107421875, + "logps/rejected": -395.5057373046875, + "loss": 0.6193, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.9227128028869629, + "rewards/margins": 0.2798658609390259, + "rewards/rejected": -1.2025786638259888, + "step": 1880 + }, + { + "epoch": 0.25, + "eval_logits/chosen": -2.5783560276031494, + "eval_logits/rejected": -2.594203472137451, + "eval_logps/chosen": -423.9530944824219, + "eval_logps/rejected": -417.4391784667969, + "eval_loss": 0.6378757357597351, + "eval_rewards/accuracies": 0.628000020980835, + "eval_rewards/chosen": -0.9120365977287292, + "eval_rewards/margins": 0.2512553036212921, + "eval_rewards/rejected": -1.1632920503616333, + "eval_runtime": 196.9999, + "eval_samples_per_second": 10.152, + "eval_steps_per_second": 5.076, + "step": 1880 + }, + { + "epoch": 0.25, + "learning_rate": 4.676957096790536e-06, + "logits/chosen": -2.652641773223877, + "logits/rejected": -2.653254985809326, + "logps/chosen": -421.16961669921875, + "logps/rejected": -393.53240966796875, + "loss": 0.6376, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.8513861894607544, + "rewards/margins": 0.23414048552513123, + "rewards/rejected": -1.085526704788208, + "step": 1890 + }, + { + "epoch": 0.25, + "eval_logits/chosen": -2.5858771800994873, + "eval_logits/rejected": -2.6016323566436768, + "eval_logps/chosen": -427.589111328125, + "eval_logps/rejected": -421.1734924316406, + "eval_loss": 0.6371917724609375, + "eval_rewards/accuracies": 0.6305000185966492, + "eval_rewards/chosen": -0.9483969211578369, + "eval_rewards/margins": 0.2522384226322174, + "eval_rewards/rejected": -1.2006351947784424, + "eval_runtime": 196.8632, + "eval_samples_per_second": 10.159, + "eval_steps_per_second": 5.08, + "step": 1890 + }, + { + "epoch": 0.25, + "learning_rate": 4.671318411098782e-06, + "logits/chosen": -2.721386432647705, + "logits/rejected": -2.8073456287384033, + "logps/chosen": -433.65435791015625, + "logps/rejected": -459.4867248535156, + "loss": 0.6282, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.8606206178665161, + "rewards/margins": 0.3143337070941925, + "rewards/rejected": -1.1749542951583862, + "step": 1900 + }, + { + "epoch": 0.25, + "eval_logits/chosen": -2.5904347896575928, + "eval_logits/rejected": -2.606128215789795, + "eval_logps/chosen": -426.4163513183594, + "eval_logps/rejected": -419.6851806640625, + "eval_loss": 0.6361418962478638, + "eval_rewards/accuracies": 0.6334999799728394, + "eval_rewards/chosen": -0.9366695284843445, + "eval_rewards/margins": 0.24908219277858734, + "eval_rewards/rejected": -1.1857519149780273, + "eval_runtime": 196.9151, + "eval_samples_per_second": 10.157, + "eval_steps_per_second": 5.078, + "step": 1900 + }, + { + "epoch": 0.25, + "learning_rate": 4.665634399054864e-06, + "logits/chosen": -2.705906867980957, + "logits/rejected": -2.770385980606079, + "logps/chosen": -397.36676025390625, + "logps/rejected": -405.4843444824219, + "loss": 0.6556, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.9889096021652222, + "rewards/margins": 0.21706286072731018, + "rewards/rejected": -1.2059725522994995, + "step": 1910 + }, + { + "epoch": 0.25, + "eval_logits/chosen": -2.5925650596618652, + "eval_logits/rejected": -2.608245611190796, + "eval_logps/chosen": -424.7822570800781, + "eval_logps/rejected": -417.7047424316406, + "eval_loss": 0.6357632875442505, + "eval_rewards/accuracies": 0.6315000057220459, + "eval_rewards/chosen": -0.9203288555145264, + "eval_rewards/margins": 0.24561835825443268, + "eval_rewards/rejected": -1.1659470796585083, + "eval_runtime": 197.5903, + "eval_samples_per_second": 10.122, + "eval_steps_per_second": 5.061, + "step": 1910 + }, + { + "epoch": 0.25, + "learning_rate": 4.659905179312743e-06, + "logits/chosen": -2.8598313331604004, + "logits/rejected": -2.8456664085388184, + "logps/chosen": -448.54425048828125, + "logps/rejected": -401.2884521484375, + "loss": 0.6259, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.866184413433075, + "rewards/margins": 0.26407763361930847, + "rewards/rejected": -1.130262017250061, + "step": 1920 + }, + { + "epoch": 0.25, + "eval_logits/chosen": -2.6057279109954834, + "eval_logits/rejected": -2.6205661296844482, + "eval_logps/chosen": -417.2929382324219, + "eval_logps/rejected": -409.1140441894531, + "eval_loss": 0.6337299942970276, + "eval_rewards/accuracies": 0.6340000033378601, + "eval_rewards/chosen": -0.8454354405403137, + "eval_rewards/margins": 0.23460477590560913, + "eval_rewards/rejected": -1.0800403356552124, + "eval_runtime": 197.1043, + "eval_samples_per_second": 10.147, + "eval_steps_per_second": 5.073, + "step": 1920 + }, + { + "epoch": 0.25, + "learning_rate": 4.654130871470093e-06, + "logits/chosen": -2.7806954383850098, + "logits/rejected": -2.756470203399658, + "logps/chosen": -415.10272216796875, + "logps/rejected": -368.8055114746094, + "loss": 0.7005, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.8875184059143066, + "rewards/margins": 0.07059729844331741, + "rewards/rejected": -0.9581157565116882, + "step": 1930 + }, + { + "epoch": 0.25, + "eval_logits/chosen": -2.6222054958343506, + "eval_logits/rejected": -2.636209487915039, + "eval_logps/chosen": -413.4906005859375, + "eval_logps/rejected": -404.2983703613281, + "eval_loss": 0.6328663229942322, + "eval_rewards/accuracies": 0.6389999985694885, + "eval_rewards/chosen": -0.807411789894104, + "eval_rewards/margins": 0.22447140514850616, + "eval_rewards/rejected": -1.0318833589553833, + "eval_runtime": 197.0477, + "eval_samples_per_second": 10.15, + "eval_steps_per_second": 5.075, + "step": 1930 + }, + { + "epoch": 0.25, + "learning_rate": 4.6483115960658045e-06, + "logits/chosen": -2.877629518508911, + "logits/rejected": -2.865546464920044, + "logps/chosen": -413.8694763183594, + "logps/rejected": -342.9363098144531, + "loss": 0.6331, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.7345770597457886, + "rewards/margins": 0.18642066419124603, + "rewards/rejected": -0.9209977388381958, + "step": 1940 + }, + { + "epoch": 0.25, + "eval_logits/chosen": -2.629322052001953, + "eval_logits/rejected": -2.6429662704467773, + "eval_logps/chosen": -411.325927734375, + "eval_logps/rejected": -401.53961181640625, + "eval_loss": 0.6325713992118835, + "eval_rewards/accuracies": 0.6349999904632568, + "eval_rewards/chosen": -0.7857657074928284, + "eval_rewards/margins": 0.21853068470954895, + "eval_rewards/rejected": -1.0042963027954102, + "eval_runtime": 197.0888, + "eval_samples_per_second": 10.148, + "eval_steps_per_second": 5.074, + "step": 1940 + }, + { + "epoch": 0.26, + "learning_rate": 4.642447474577466e-06, + "logits/chosen": -2.7526779174804688, + "logits/rejected": -2.7635135650634766, + "logps/chosen": -373.68670654296875, + "logps/rejected": -378.1413269042969, + "loss": 0.6362, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.799132227897644, + "rewards/margins": 0.19670510292053223, + "rewards/rejected": -0.9958373308181763, + "step": 1950 + }, + { + "epoch": 0.26, + "eval_logits/chosen": -2.6297872066497803, + "eval_logits/rejected": -2.64349627494812, + "eval_logps/chosen": -410.59429931640625, + "eval_logps/rejected": -400.9759521484375, + "eval_loss": 0.6317591667175293, + "eval_rewards/accuracies": 0.637499988079071, + "eval_rewards/chosen": -0.7784488201141357, + "eval_rewards/margins": 0.22021029889583588, + "eval_rewards/rejected": -0.9986591339111328, + "eval_runtime": 197.0825, + "eval_samples_per_second": 10.148, + "eval_steps_per_second": 5.074, + "step": 1950 + }, + { + "epoch": 0.26, + "learning_rate": 4.636538629418832e-06, + "logits/chosen": -2.811131715774536, + "logits/rejected": -2.8222975730895996, + "logps/chosen": -440.174560546875, + "logps/rejected": -429.7535095214844, + "loss": 0.5862, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.7554556131362915, + "rewards/margins": 0.3168772757053375, + "rewards/rejected": -1.0723329782485962, + "step": 1960 + }, + { + "epoch": 0.26, + "eval_logits/chosen": -2.620647668838501, + "eval_logits/rejected": -2.634829044342041, + "eval_logps/chosen": -415.5986328125, + "eval_logps/rejected": -407.06268310546875, + "eval_loss": 0.6318819522857666, + "eval_rewards/accuracies": 0.6355000138282776, + "eval_rewards/chosen": -0.8284925222396851, + "eval_rewards/margins": 0.23103398084640503, + "eval_rewards/rejected": -1.0595263242721558, + "eval_runtime": 197.1465, + "eval_samples_per_second": 10.145, + "eval_steps_per_second": 5.072, + "step": 1960 + }, + { + "epoch": 0.26, + "learning_rate": 4.630585183937263e-06, + "logits/chosen": -2.806405544281006, + "logits/rejected": -2.7973275184631348, + "logps/chosen": -413.4725646972656, + "logps/rejected": -394.82708740234375, + "loss": 0.6907, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.74481201171875, + "rewards/margins": 0.07658366113901138, + "rewards/rejected": -0.8213956952095032, + "step": 1970 + }, + { + "epoch": 0.26, + "eval_logits/chosen": -2.615365743637085, + "eval_logits/rejected": -2.6301496028900146, + "eval_logps/chosen": -410.3633117675781, + "eval_logps/rejected": -401.2737731933594, + "eval_loss": 0.6315578818321228, + "eval_rewards/accuracies": 0.6345000267028809, + "eval_rewards/chosen": -0.7761390209197998, + "eval_rewards/margins": 0.2254989594221115, + "eval_rewards/rejected": -1.0016380548477173, + "eval_runtime": 197.0852, + "eval_samples_per_second": 10.148, + "eval_steps_per_second": 5.074, + "step": 1970 + }, + { + "epoch": 0.26, + "learning_rate": 4.6245872624111535e-06, + "logits/chosen": -2.8345344066619873, + "logits/rejected": -2.8294196128845215, + "logps/chosen": -349.9237060546875, + "logps/rejected": -348.3368835449219, + "loss": 0.6349, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -0.6936440467834473, + "rewards/margins": 0.2063537836074829, + "rewards/rejected": -0.8999978303909302, + "step": 1980 + }, + { + "epoch": 0.26, + "eval_logits/chosen": -2.6132729053497314, + "eval_logits/rejected": -2.6283042430877686, + "eval_logps/chosen": -407.8918151855469, + "eval_logps/rejected": -398.5971374511719, + "eval_loss": 0.6315102577209473, + "eval_rewards/accuracies": 0.6330000162124634, + "eval_rewards/chosen": -0.751424252986908, + "eval_rewards/margins": 0.22344675660133362, + "eval_rewards/rejected": -0.9748709797859192, + "eval_runtime": 197.0545, + "eval_samples_per_second": 10.149, + "eval_steps_per_second": 5.075, + "step": 1980 + }, + { + "epoch": 0.26, + "learning_rate": 4.618544990047336e-06, + "logits/chosen": -2.8143086433410645, + "logits/rejected": -2.787330150604248, + "logps/chosen": -453.98297119140625, + "logps/rejected": -445.3204040527344, + "loss": 0.618, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.781507134437561, + "rewards/margins": 0.27939194440841675, + "rewards/rejected": -1.060899019241333, + "step": 1990 + }, + { + "epoch": 0.26, + "eval_logits/chosen": -2.6105549335479736, + "eval_logits/rejected": -2.625771999359131, + "eval_logps/chosen": -417.1804504394531, + "eval_logps/rejected": -409.4155578613281, + "eval_loss": 0.6315101385116577, + "eval_rewards/accuracies": 0.6414999961853027, + "eval_rewards/chosen": -0.8443105220794678, + "eval_rewards/margins": 0.23874500393867493, + "eval_rewards/rejected": -1.0830554962158203, + "eval_runtime": 197.0596, + "eval_samples_per_second": 10.149, + "eval_steps_per_second": 5.075, + "step": 1990 + }, + { + "epoch": 0.26, + "learning_rate": 4.612458492978473e-06, + "logits/chosen": -2.8706493377685547, + "logits/rejected": -2.8462719917297363, + "logps/chosen": -397.9223327636719, + "logps/rejected": -415.9630432128906, + "loss": 0.6924, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.9325806498527527, + "rewards/margins": 0.136087566614151, + "rewards/rejected": -1.0686681270599365, + "step": 2000 + }, + { + "epoch": 0.26, + "eval_logits/chosen": -2.6107311248779297, + "eval_logits/rejected": -2.6258249282836914, + "eval_logps/chosen": -417.8204345703125, + "eval_logps/rejected": -410.0538330078125, + "eval_loss": 0.6316912174224854, + "eval_rewards/accuracies": 0.6389999985694885, + "eval_rewards/chosen": -0.8507106900215149, + "eval_rewards/margins": 0.23872776329517365, + "eval_rewards/rejected": -1.0894384384155273, + "eval_runtime": 197.0063, + "eval_samples_per_second": 10.152, + "eval_steps_per_second": 5.076, + "step": 2000 + }, + { + "epoch": 0.26, + "learning_rate": 4.606327898260413e-06, + "logits/chosen": -2.686081647872925, + "logits/rejected": -2.7080864906311035, + "logps/chosen": -447.25384521484375, + "logps/rejected": -430.2577209472656, + "loss": 0.6461, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.8990098237991333, + "rewards/margins": 0.2516574263572693, + "rewards/rejected": -1.1506671905517578, + "step": 2010 + }, + { + "epoch": 0.26, + "eval_logits/chosen": -2.612136125564575, + "eval_logits/rejected": -2.62685227394104, + "eval_logps/chosen": -412.8479309082031, + "eval_logps/rejected": -404.4166259765625, + "eval_loss": 0.630695641040802, + "eval_rewards/accuracies": 0.640500009059906, + "eval_rewards/chosen": -0.8009849786758423, + "eval_rewards/margins": 0.2320813089609146, + "eval_rewards/rejected": -1.0330662727355957, + "eval_runtime": 196.7936, + "eval_samples_per_second": 10.163, + "eval_steps_per_second": 5.081, + "step": 2010 + }, + { + "epoch": 0.26, + "learning_rate": 4.600153333869549e-06, + "logits/chosen": -2.8086211681365967, + "logits/rejected": -2.819854736328125, + "logps/chosen": -422.72161865234375, + "logps/rejected": -394.31787109375, + "loss": 0.6233, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.7718098163604736, + "rewards/margins": 0.24191728234291077, + "rewards/rejected": -1.013727068901062, + "step": 2020 + }, + { + "epoch": 0.26, + "eval_logits/chosen": -2.609605073928833, + "eval_logits/rejected": -2.624340772628784, + "eval_logps/chosen": -409.9208068847656, + "eval_logps/rejected": -401.1809997558594, + "eval_loss": 0.6306189298629761, + "eval_rewards/accuracies": 0.640500009059906, + "eval_rewards/chosen": -0.7717139720916748, + "eval_rewards/margins": 0.22899581491947174, + "eval_rewards/rejected": -1.000709891319275, + "eval_runtime": 196.9939, + "eval_samples_per_second": 10.153, + "eval_steps_per_second": 5.076, + "step": 2020 + }, + { + "epoch": 0.27, + "learning_rate": 4.593934928700141e-06, + "logits/chosen": -2.841212749481201, + "logits/rejected": -2.8480188846588135, + "logps/chosen": -415.624755859375, + "logps/rejected": -377.48773193359375, + "loss": 0.6237, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.7407634854316711, + "rewards/margins": 0.24683237075805664, + "rewards/rejected": -0.9875958561897278, + "step": 2030 + }, + { + "epoch": 0.27, + "eval_logits/chosen": -2.607243299484253, + "eval_logits/rejected": -2.6218373775482178, + "eval_logps/chosen": -406.3677978515625, + "eval_logps/rejected": -397.22369384765625, + "eval_loss": 0.630490243434906, + "eval_rewards/accuracies": 0.6380000114440918, + "eval_rewards/chosen": -0.736184298992157, + "eval_rewards/margins": 0.22495214641094208, + "eval_rewards/rejected": -0.961136519908905, + "eval_runtime": 196.961, + "eval_samples_per_second": 10.154, + "eval_steps_per_second": 5.077, + "step": 2030 + }, + { + "epoch": 0.27, + "learning_rate": 4.587672812561626e-06, + "logits/chosen": -2.81145977973938, + "logits/rejected": -2.781007766723633, + "logps/chosen": -369.285400390625, + "logps/rejected": -425.7210998535156, + "loss": 0.5939, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.7476687431335449, + "rewards/margins": 0.28507062792778015, + "rewards/rejected": -1.0327394008636475, + "step": 2040 + }, + { + "epoch": 0.27, + "eval_logits/chosen": -2.6005775928497314, + "eval_logits/rejected": -2.6154563426971436, + "eval_logps/chosen": -408.3466796875, + "eval_logps/rejected": -399.87847900390625, + "eval_loss": 0.6307638883590698, + "eval_rewards/accuracies": 0.6384999752044678, + "eval_rewards/chosen": -0.7559728622436523, + "eval_rewards/margins": 0.23171177506446838, + "eval_rewards/rejected": -0.9876845479011536, + "eval_runtime": 196.9073, + "eval_samples_per_second": 10.157, + "eval_steps_per_second": 5.079, + "step": 2040 + }, + { + "epoch": 0.27, + "learning_rate": 4.581367116175911e-06, + "logits/chosen": -2.7396187782287598, + "logits/rejected": -2.731571912765503, + "logps/chosen": -433.5108337402344, + "logps/rejected": -405.5694885253906, + "loss": 0.5974, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.693037748336792, + "rewards/margins": 0.31083375215530396, + "rewards/rejected": -1.0038714408874512, + "step": 2050 + }, + { + "epoch": 0.27, + "eval_logits/chosen": -2.5866856575012207, + "eval_logits/rejected": -2.6025893688201904, + "eval_logps/chosen": -409.65179443359375, + "eval_logps/rejected": -401.7351379394531, + "eval_loss": 0.6328474283218384, + "eval_rewards/accuracies": 0.6389999985694885, + "eval_rewards/chosen": -0.769023597240448, + "eval_rewards/margins": 0.2372276335954666, + "eval_rewards/rejected": -1.0062512159347534, + "eval_runtime": 197.2625, + "eval_samples_per_second": 10.139, + "eval_steps_per_second": 5.069, + "step": 2050 + }, + { + "epoch": 0.27, + "learning_rate": 4.5750179711746416e-06, + "logits/chosen": -2.7967312335968018, + "logits/rejected": -2.7692575454711914, + "logps/chosen": -399.40399169921875, + "logps/rejected": -404.92596435546875, + "loss": 0.6569, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.7823539972305298, + "rewards/margins": 0.16327540576457977, + "rewards/rejected": -0.9456294178962708, + "step": 2060 + }, + { + "epoch": 0.27, + "eval_logits/chosen": -2.5818114280700684, + "eval_logits/rejected": -2.598083734512329, + "eval_logps/chosen": -414.1978759765625, + "eval_logps/rejected": -406.9813537597656, + "eval_loss": 0.6336009502410889, + "eval_rewards/accuracies": 0.6370000243186951, + "eval_rewards/chosen": -0.8144845962524414, + "eval_rewards/margins": 0.24422858655452728, + "eval_rewards/rejected": -1.0587131977081299, + "eval_runtime": 196.8703, + "eval_samples_per_second": 10.159, + "eval_steps_per_second": 5.079, + "step": 2060 + }, + { + "epoch": 0.27, + "learning_rate": 4.5686255100964535e-06, + "logits/chosen": -2.845377206802368, + "logits/rejected": -2.8053154945373535, + "logps/chosen": -410.73785400390625, + "logps/rejected": -380.6125183105469, + "loss": 0.6322, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.8289012908935547, + "rewards/margins": 0.2113029509782791, + "rewards/rejected": -1.0402042865753174, + "step": 2070 + }, + { + "epoch": 0.27, + "eval_logits/chosen": -2.5877645015716553, + "eval_logits/rejected": -2.604356527328491, + "eval_logps/chosen": -417.6963195800781, + "eval_logps/rejected": -411.0251770019531, + "eval_loss": 0.6324384212493896, + "eval_rewards/accuracies": 0.640999972820282, + "eval_rewards/chosen": -0.8494692444801331, + "eval_rewards/margins": 0.24968257546424866, + "eval_rewards/rejected": -1.0991517305374146, + "eval_runtime": 196.9005, + "eval_samples_per_second": 10.157, + "eval_steps_per_second": 5.079, + "step": 2070 + }, + { + "epoch": 0.27, + "learning_rate": 4.562189866384209e-06, + "logits/chosen": -2.691206932067871, + "logits/rejected": -2.7267496585845947, + "logps/chosen": -375.44580078125, + "logps/rejected": -422.0435485839844, + "loss": 0.6262, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.8189651370048523, + "rewards/margins": 0.2774657607078552, + "rewards/rejected": -1.0964308977127075, + "step": 2080 + }, + { + "epoch": 0.27, + "eval_logits/chosen": -2.592376470565796, + "eval_logits/rejected": -2.608642101287842, + "eval_logps/chosen": -422.14459228515625, + "eval_logps/rejected": -415.8671569824219, + "eval_loss": 0.6311394572257996, + "eval_rewards/accuracies": 0.6399999856948853, + "eval_rewards/chosen": -0.8939514756202698, + "eval_rewards/margins": 0.25362005829811096, + "eval_rewards/rejected": -1.1475715637207031, + "eval_runtime": 197.0619, + "eval_samples_per_second": 10.149, + "eval_steps_per_second": 5.075, + "step": 2080 + }, + { + "epoch": 0.27, + "learning_rate": 4.555711174382209e-06, + "logits/chosen": -2.811758518218994, + "logits/rejected": -2.8001110553741455, + "logps/chosen": -375.446533203125, + "logps/rejected": -360.69464111328125, + "loss": 0.6663, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.8602222204208374, + "rewards/margins": 0.16579048335552216, + "rewards/rejected": -1.026012659072876, + "step": 2090 + }, + { + "epoch": 0.27, + "eval_logits/chosen": -2.5941474437713623, + "eval_logits/rejected": -2.6107828617095947, + "eval_logps/chosen": -421.90533447265625, + "eval_logps/rejected": -415.3799133300781, + "eval_loss": 0.6304261684417725, + "eval_rewards/accuracies": 0.6424999833106995, + "eval_rewards/chosen": -0.8915589451789856, + "eval_rewards/margins": 0.25113990902900696, + "eval_rewards/rejected": -1.142698884010315, + "eval_runtime": 197.0368, + "eval_samples_per_second": 10.15, + "eval_steps_per_second": 5.075, + "step": 2090 + }, + { + "epoch": 0.27, + "learning_rate": 4.549189569333387e-06, + "logits/chosen": -2.784393787384033, + "logits/rejected": -2.711235284805298, + "logps/chosen": -375.9978332519531, + "logps/rejected": -356.5938415527344, + "loss": 0.6222, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.8262361288070679, + "rewards/margins": 0.24428649246692657, + "rewards/rejected": -1.0705227851867676, + "step": 2100 + }, + { + "epoch": 0.27, + "eval_logits/chosen": -2.5947983264923096, + "eval_logits/rejected": -2.6116442680358887, + "eval_logps/chosen": -422.1054382324219, + "eval_logps/rejected": -415.39764404296875, + "eval_loss": 0.6300971508026123, + "eval_rewards/accuracies": 0.6384999752044678, + "eval_rewards/chosen": -0.8935604691505432, + "eval_rewards/margins": 0.2493157833814621, + "eval_rewards/rejected": -1.1428762674331665, + "eval_runtime": 196.9277, + "eval_samples_per_second": 10.156, + "eval_steps_per_second": 5.078, + "step": 2100 + }, + { + "epoch": 0.28, + "learning_rate": 4.542625187376491e-06, + "logits/chosen": -2.7952916622161865, + "logits/rejected": -2.7755210399627686, + "logps/chosen": -446.38494873046875, + "logps/rejected": -415.366455078125, + "loss": 0.6496, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.8958450555801392, + "rewards/margins": 0.19885332882404327, + "rewards/rejected": -1.0946983098983765, + "step": 2110 + }, + { + "epoch": 0.28, + "eval_logits/chosen": -2.593679904937744, + "eval_logits/rejected": -2.610772132873535, + "eval_logps/chosen": -418.0361022949219, + "eval_logps/rejected": -410.80035400390625, + "eval_loss": 0.629709005355835, + "eval_rewards/accuracies": 0.640999972820282, + "eval_rewards/chosen": -0.8528667092323303, + "eval_rewards/margins": 0.24403661489486694, + "eval_rewards/rejected": -1.0969033241271973, + "eval_runtime": 196.9676, + "eval_samples_per_second": 10.154, + "eval_steps_per_second": 5.077, + "step": 2110 + }, + { + "epoch": 0.28, + "learning_rate": 4.536018165543239e-06, + "logits/chosen": -2.8523917198181152, + "logits/rejected": -2.8088977336883545, + "logps/chosen": -459.11102294921875, + "logps/rejected": -462.03546142578125, + "loss": 0.6135, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.8140150308609009, + "rewards/margins": 0.28719818592071533, + "rewards/rejected": -1.1012132167816162, + "step": 2120 + }, + { + "epoch": 0.28, + "eval_logits/chosen": -2.589694023132324, + "eval_logits/rejected": -2.606966257095337, + "eval_logps/chosen": -416.22003173828125, + "eval_logps/rejected": -408.8890380859375, + "eval_loss": 0.6295616626739502, + "eval_rewards/accuracies": 0.6389999985694885, + "eval_rewards/chosen": -0.8347060680389404, + "eval_rewards/margins": 0.24308432638645172, + "eval_rewards/rejected": -1.0777904987335205, + "eval_runtime": 196.8827, + "eval_samples_per_second": 10.158, + "eval_steps_per_second": 5.079, + "step": 2120 + }, + { + "epoch": 0.28, + "learning_rate": 4.529368641755453e-06, + "logits/chosen": -2.8522391319274902, + "logits/rejected": -2.889514923095703, + "logps/chosen": -359.7933349609375, + "logps/rejected": -378.53997802734375, + "loss": 0.65, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.868852436542511, + "rewards/margins": 0.2264028787612915, + "rewards/rejected": -1.0952553749084473, + "step": 2130 + }, + { + "epoch": 0.28, + "eval_logits/chosen": -2.5772836208343506, + "eval_logits/rejected": -2.594741106033325, + "eval_logps/chosen": -421.50567626953125, + "eval_logps/rejected": -415.184814453125, + "eval_loss": 0.630107045173645, + "eval_rewards/accuracies": 0.640500009059906, + "eval_rewards/chosen": -0.8875633478164673, + "eval_rewards/margins": 0.2531849145889282, + "eval_rewards/rejected": -1.1407482624053955, + "eval_runtime": 196.7983, + "eval_samples_per_second": 10.163, + "eval_steps_per_second": 5.081, + "step": 2130 + }, + { + "epoch": 0.28, + "learning_rate": 4.522676754822189e-06, + "logits/chosen": -2.7324087619781494, + "logits/rejected": -2.6535348892211914, + "logps/chosen": -436.89208984375, + "logps/rejected": -360.9748229980469, + "loss": 0.6562, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.9051302075386047, + "rewards/margins": 0.21084150671958923, + "rewards/rejected": -1.1159718036651611, + "step": 2140 + }, + { + "epoch": 0.28, + "eval_logits/chosen": -2.58268666267395, + "eval_logits/rejected": -2.5994439125061035, + "eval_logps/chosen": -419.2276916503906, + "eval_logps/rejected": -412.3636169433594, + "eval_loss": 0.6285167932510376, + "eval_rewards/accuracies": 0.6464999914169312, + "eval_rewards/chosen": -0.8647826910018921, + "eval_rewards/margins": 0.2477533221244812, + "eval_rewards/rejected": -1.1125361919403076, + "eval_runtime": 196.7684, + "eval_samples_per_second": 10.164, + "eval_steps_per_second": 5.082, + "step": 2140 + }, + { + "epoch": 0.28, + "learning_rate": 4.515942644436836e-06, + "logits/chosen": -2.78916597366333, + "logits/rejected": -2.79569673538208, + "logps/chosen": -430.1502990722656, + "logps/rejected": -427.21038818359375, + "loss": 0.5989, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.8669744729995728, + "rewards/margins": 0.3714192509651184, + "rewards/rejected": -1.238393783569336, + "step": 2150 + }, + { + "epoch": 0.28, + "eval_logits/chosen": -2.5814082622528076, + "eval_logits/rejected": -2.597965955734253, + "eval_logps/chosen": -421.9512634277344, + "eval_logps/rejected": -415.35882568359375, + "eval_loss": 0.6280709505081177, + "eval_rewards/accuracies": 0.6474999785423279, + "eval_rewards/chosen": -0.8920185565948486, + "eval_rewards/margins": 0.25046926736831665, + "eval_rewards/rejected": -1.1424877643585205, + "eval_runtime": 196.8996, + "eval_samples_per_second": 10.157, + "eval_steps_per_second": 5.079, + "step": 2150 + }, + { + "epoch": 0.28, + "learning_rate": 4.509166451174194e-06, + "logits/chosen": -2.8253769874572754, + "logits/rejected": -2.824777364730835, + "logps/chosen": -454.80169677734375, + "logps/rejected": -447.1356506347656, + "loss": 0.6232, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.8632291555404663, + "rewards/margins": 0.23953184485435486, + "rewards/rejected": -1.1027610301971436, + "step": 2160 + }, + { + "epoch": 0.28, + "eval_logits/chosen": -2.5811665058135986, + "eval_logits/rejected": -2.5974154472351074, + "eval_logps/chosen": -426.8110656738281, + "eval_logps/rejected": -420.6875305175781, + "eval_loss": 0.62840735912323, + "eval_rewards/accuracies": 0.6460000276565552, + "eval_rewards/chosen": -0.9406165480613708, + "eval_rewards/margins": 0.25515857338905334, + "eval_rewards/rejected": -1.1957751512527466, + "eval_runtime": 196.7753, + "eval_samples_per_second": 10.164, + "eval_steps_per_second": 5.082, + "step": 2160 + }, + { + "epoch": 0.28, + "learning_rate": 4.502348316487552e-06, + "logits/chosen": -2.7800397872924805, + "logits/rejected": -2.74601411819458, + "logps/chosen": -441.43670654296875, + "logps/rejected": -417.4474182128906, + "loss": 0.6596, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.0352487564086914, + "rewards/margins": 0.19729386270046234, + "rewards/rejected": -1.2325425148010254, + "step": 2170 + }, + { + "epoch": 0.28, + "eval_logits/chosen": -2.584304094314575, + "eval_logits/rejected": -2.5999248027801514, + "eval_logps/chosen": -424.75494384765625, + "eval_logps/rejected": -418.04986572265625, + "eval_loss": 0.6277941465377808, + "eval_rewards/accuracies": 0.6464999914169312, + "eval_rewards/chosen": -0.9200555086135864, + "eval_rewards/margins": 0.24934299290180206, + "eval_rewards/rejected": -1.169398546218872, + "eval_runtime": 196.9617, + "eval_samples_per_second": 10.154, + "eval_steps_per_second": 5.077, + "step": 2170 + }, + { + "epoch": 0.29, + "learning_rate": 4.495488382705722e-06, + "logits/chosen": -2.776062488555908, + "logits/rejected": -2.755868434906006, + "logps/chosen": -491.7884216308594, + "logps/rejected": -413.3304138183594, + "loss": 0.5964, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.7972935438156128, + "rewards/margins": 0.30835580825805664, + "rewards/rejected": -1.105649471282959, + "step": 2180 + }, + { + "epoch": 0.29, + "eval_logits/chosen": -2.5931167602539062, + "eval_logits/rejected": -2.6081583499908447, + "eval_logps/chosen": -418.2218933105469, + "eval_logps/rejected": -410.45379638671875, + "eval_loss": 0.6268242597579956, + "eval_rewards/accuracies": 0.6514999866485596, + "eval_rewards/chosen": -0.8547250032424927, + "eval_rewards/margins": 0.23871254920959473, + "eval_rewards/rejected": -1.0934375524520874, + "eval_runtime": 196.9069, + "eval_samples_per_second": 10.157, + "eval_steps_per_second": 5.079, + "step": 2180 + }, + { + "epoch": 0.29, + "learning_rate": 4.488586793030075e-06, + "logits/chosen": -2.7607836723327637, + "logits/rejected": -2.716301441192627, + "logps/chosen": -357.68621826171875, + "logps/rejected": -410.98468017578125, + "loss": 0.5492, + "rewards/accuracies": 0.7875000238418579, + "rewards/chosen": -0.7945287227630615, + "rewards/margins": 0.4202180802822113, + "rewards/rejected": -1.2147467136383057, + "step": 2190 + }, + { + "epoch": 0.29, + "eval_logits/chosen": -2.591677188873291, + "eval_logits/rejected": -2.6068708896636963, + "eval_logps/chosen": -417.1944274902344, + "eval_logps/rejected": -409.63494873046875, + "eval_loss": 0.6265187859535217, + "eval_rewards/accuracies": 0.6514999866485596, + "eval_rewards/chosen": -0.8444498181343079, + "eval_rewards/margins": 0.2407991737127304, + "eval_rewards/rejected": -1.0852489471435547, + "eval_runtime": 196.95, + "eval_samples_per_second": 10.155, + "eval_steps_per_second": 5.077, + "step": 2190 + }, + { + "epoch": 0.29, + "learning_rate": 4.481643691531551e-06, + "logits/chosen": -2.8239293098449707, + "logits/rejected": -2.846830368041992, + "logps/chosen": -403.1731872558594, + "logps/rejected": -379.99462890625, + "loss": 0.6317, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.7410529851913452, + "rewards/margins": 0.24859830737113953, + "rewards/rejected": -0.9896513223648071, + "step": 2200 + }, + { + "epoch": 0.29, + "eval_logits/chosen": -2.5892865657806396, + "eval_logits/rejected": -2.6051228046417236, + "eval_logps/chosen": -413.1067199707031, + "eval_logps/rejected": -405.3682556152344, + "eval_loss": 0.6259841322898865, + "eval_rewards/accuracies": 0.6520000100135803, + "eval_rewards/chosen": -0.8035732507705688, + "eval_rewards/margins": 0.2390093058347702, + "eval_rewards/rejected": -1.0425825119018555, + "eval_runtime": 197.1506, + "eval_samples_per_second": 10.145, + "eval_steps_per_second": 5.072, + "step": 2200 + }, + { + "epoch": 0.29, + "learning_rate": 4.474659223147652e-06, + "logits/chosen": -2.813742160797119, + "logits/rejected": -2.821537494659424, + "logps/chosen": -422.7051696777344, + "logps/rejected": -411.17791748046875, + "loss": 0.6286, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.8256515264511108, + "rewards/margins": 0.2547362744808197, + "rewards/rejected": -1.080387830734253, + "step": 2210 + }, + { + "epoch": 0.29, + "eval_logits/chosen": -2.582897663116455, + "eval_logits/rejected": -2.5993919372558594, + "eval_logps/chosen": -413.4576110839844, + "eval_logps/rejected": -406.21124267578125, + "eval_loss": 0.625976026058197, + "eval_rewards/accuracies": 0.6445000171661377, + "eval_rewards/chosen": -0.8070821166038513, + "eval_rewards/margins": 0.243929922580719, + "eval_rewards/rejected": -1.0510119199752808, + "eval_runtime": 196.7712, + "eval_samples_per_second": 10.164, + "eval_steps_per_second": 5.082, + "step": 2210 + }, + { + "epoch": 0.29, + "learning_rate": 4.4676335336794125e-06, + "logits/chosen": -2.7268691062927246, + "logits/rejected": -2.7420523166656494, + "logps/chosen": -458.7037048339844, + "logps/rejected": -435.62042236328125, + "loss": 0.6342, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.8187200427055359, + "rewards/margins": 0.21425040066242218, + "rewards/rejected": -1.0329705476760864, + "step": 2220 + }, + { + "epoch": 0.29, + "eval_logits/chosen": -2.5829458236694336, + "eval_logits/rejected": -2.5995917320251465, + "eval_logps/chosen": -415.4107360839844, + "eval_logps/rejected": -408.4854736328125, + "eval_loss": 0.6260092258453369, + "eval_rewards/accuracies": 0.6464999914169312, + "eval_rewards/chosen": -0.8266136646270752, + "eval_rewards/margins": 0.24714109301567078, + "eval_rewards/rejected": -1.0737547874450684, + "eval_runtime": 196.9414, + "eval_samples_per_second": 10.155, + "eval_steps_per_second": 5.078, + "step": 2220 + }, + { + "epoch": 0.29, + "learning_rate": 4.46056676978836e-06, + "logits/chosen": -2.761662006378174, + "logits/rejected": -2.779341697692871, + "logps/chosen": -397.09051513671875, + "logps/rejected": -458.64166259765625, + "loss": 0.6217, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.7701650857925415, + "rewards/margins": 0.2514593005180359, + "rewards/rejected": -1.0216243267059326, + "step": 2230 + }, + { + "epoch": 0.29, + "eval_logits/chosen": -2.5795228481292725, + "eval_logits/rejected": -2.596259117126465, + "eval_logps/chosen": -421.1188049316406, + "eval_logps/rejected": -415.04644775390625, + "eval_loss": 0.6261369585990906, + "eval_rewards/accuracies": 0.6464999914169312, + "eval_rewards/chosen": -0.8836943507194519, + "eval_rewards/margins": 0.25567007064819336, + "eval_rewards/rejected": -1.139364242553711, + "eval_runtime": 197.2042, + "eval_samples_per_second": 10.142, + "eval_steps_per_second": 5.071, + "step": 2230 + }, + { + "epoch": 0.29, + "learning_rate": 4.453459078993453e-06, + "logits/chosen": -2.692732334136963, + "logits/rejected": -2.79284930229187, + "logps/chosen": -395.21856689453125, + "logps/rejected": -419.09454345703125, + "loss": 0.5913, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.8348531723022461, + "rewards/margins": 0.3056022524833679, + "rewards/rejected": -1.1404553651809692, + "step": 2240 + }, + { + "epoch": 0.29, + "eval_logits/chosen": -2.570924758911133, + "eval_logits/rejected": -2.58809757232666, + "eval_logps/chosen": -424.5317687988281, + "eval_logps/rejected": -419.3282470703125, + "eval_loss": 0.6271562576293945, + "eval_rewards/accuracies": 0.6430000066757202, + "eval_rewards/chosen": -0.9178237915039062, + "eval_rewards/margins": 0.26435843110084534, + "eval_rewards/rejected": -1.1821821928024292, + "eval_runtime": 196.9415, + "eval_samples_per_second": 10.155, + "eval_steps_per_second": 5.078, + "step": 2240 + }, + { + "epoch": 0.29, + "learning_rate": 4.446310609668001e-06, + "logits/chosen": -2.659118413925171, + "logits/rejected": -2.699690341949463, + "logps/chosen": -386.1409606933594, + "logps/rejected": -446.634033203125, + "loss": 0.6624, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.9850989580154419, + "rewards/margins": 0.194298654794693, + "rewards/rejected": -1.1793975830078125, + "step": 2250 + }, + { + "epoch": 0.29, + "eval_logits/chosen": -2.570498466491699, + "eval_logits/rejected": -2.5880205631256104, + "eval_logps/chosen": -425.5768127441406, + "eval_logps/rejected": -420.7292785644531, + "eval_loss": 0.6279781460762024, + "eval_rewards/accuracies": 0.6420000195503235, + "eval_rewards/chosen": -0.9282740354537964, + "eval_rewards/margins": 0.26791858673095703, + "eval_rewards/rejected": -1.1961926221847534, + "eval_runtime": 196.9759, + "eval_samples_per_second": 10.154, + "eval_steps_per_second": 5.077, + "step": 2250 + }, + { + "epoch": 0.3, + "learning_rate": 4.439121511036562e-06, + "logits/chosen": -2.758730411529541, + "logits/rejected": -2.7282633781433105, + "logps/chosen": -440.101318359375, + "logps/rejected": -413.44891357421875, + "loss": 0.6178, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.8409091234207153, + "rewards/margins": 0.31042739748954773, + "rewards/rejected": -1.151336431503296, + "step": 2260 + }, + { + "epoch": 0.3, + "eval_logits/chosen": -2.575878620147705, + "eval_logits/rejected": -2.5933985710144043, + "eval_logps/chosen": -420.9499816894531, + "eval_logps/rejected": -415.45281982421875, + "eval_loss": 0.628210186958313, + "eval_rewards/accuracies": 0.640999972820282, + "eval_rewards/chosen": -0.8820055723190308, + "eval_rewards/margins": 0.26142239570617676, + "eval_rewards/rejected": -1.143427848815918, + "eval_runtime": 196.9298, + "eval_samples_per_second": 10.156, + "eval_steps_per_second": 5.078, + "step": 2260 + }, + { + "epoch": 0.3, + "learning_rate": 4.431891933171839e-06, + "logits/chosen": -2.731050968170166, + "logits/rejected": -2.701270580291748, + "logps/chosen": -414.69268798828125, + "logps/rejected": -407.79150390625, + "loss": 0.6685, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.8784014582633972, + "rewards/margins": 0.18701156973838806, + "rewards/rejected": -1.0654131174087524, + "step": 2270 + }, + { + "epoch": 0.3, + "eval_logits/chosen": -2.5852274894714355, + "eval_logits/rejected": -2.6019883155822754, + "eval_logps/chosen": -421.2300109863281, + "eval_logps/rejected": -415.59075927734375, + "eval_loss": 0.6269444823265076, + "eval_rewards/accuracies": 0.6455000042915344, + "eval_rewards/chosen": -0.8848059773445129, + "eval_rewards/margins": 0.2600012421607971, + "eval_rewards/rejected": -1.14480721950531, + "eval_runtime": 196.8549, + "eval_samples_per_second": 10.16, + "eval_steps_per_second": 5.08, + "step": 2270 + }, + { + "epoch": 0.3, + "learning_rate": 4.424622026991536e-06, + "logits/chosen": -2.7388529777526855, + "logits/rejected": -2.7334494590759277, + "logps/chosen": -420.9461975097656, + "logps/rejected": -408.1824951171875, + "loss": 0.6301, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.8830841183662415, + "rewards/margins": 0.2399568259716034, + "rewards/rejected": -1.1230409145355225, + "step": 2280 + }, + { + "epoch": 0.3, + "eval_logits/chosen": -2.5922598838806152, + "eval_logits/rejected": -2.60862398147583, + "eval_logps/chosen": -421.9366760253906, + "eval_logps/rejected": -416.2523193359375, + "eval_loss": 0.6262630224227905, + "eval_rewards/accuracies": 0.6474999785423279, + "eval_rewards/chosen": -0.891872763633728, + "eval_rewards/margins": 0.2595498561859131, + "eval_rewards/rejected": -1.1514227390289307, + "eval_runtime": 196.8689, + "eval_samples_per_second": 10.159, + "eval_steps_per_second": 5.08, + "step": 2280 + }, + { + "epoch": 0.3, + "learning_rate": 4.417311944255215e-06, + "logits/chosen": -2.8399720191955566, + "logits/rejected": -2.8531653881073, + "logps/chosen": -379.5211181640625, + "logps/rejected": -424.2601623535156, + "loss": 0.6861, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.863876461982727, + "rewards/margins": 0.12213647365570068, + "rewards/rejected": -0.9860130548477173, + "step": 2290 + }, + { + "epoch": 0.3, + "eval_logits/chosen": -2.590639352798462, + "eval_logits/rejected": -2.6069109439849854, + "eval_logps/chosen": -423.515625, + "eval_logps/rejected": -418.02130126953125, + "eval_loss": 0.6258890628814697, + "eval_rewards/accuracies": 0.6495000123977661, + "eval_rewards/chosen": -0.9076623320579529, + "eval_rewards/margins": 0.26145049929618835, + "eval_rewards/rejected": -1.1691128015518188, + "eval_runtime": 197.0851, + "eval_samples_per_second": 10.148, + "eval_steps_per_second": 5.074, + "step": 2290 + }, + { + "epoch": 0.3, + "learning_rate": 4.409961837561122e-06, + "logits/chosen": -2.789848804473877, + "logits/rejected": -2.7216029167175293, + "logps/chosen": -463.98345947265625, + "logps/rejected": -494.39251708984375, + "loss": 0.6088, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.9309770464897156, + "rewards/margins": 0.31190377473831177, + "rewards/rejected": -1.2428807020187378, + "step": 2300 + }, + { + "epoch": 0.3, + "eval_logits/chosen": -2.5774741172790527, + "eval_logits/rejected": -2.5942113399505615, + "eval_logps/chosen": -424.3768005371094, + "eval_logps/rejected": -419.164306640625, + "eval_loss": 0.6265602707862854, + "eval_rewards/accuracies": 0.6460000276565552, + "eval_rewards/chosen": -0.9162741899490356, + "eval_rewards/margins": 0.2642686367034912, + "eval_rewards/rejected": -1.1805428266525269, + "eval_runtime": 196.9992, + "eval_samples_per_second": 10.152, + "eval_steps_per_second": 5.076, + "step": 2300 + }, + { + "epoch": 0.3, + "learning_rate": 4.402571860343006e-06, + "logits/chosen": -2.7374491691589355, + "logits/rejected": -2.7134087085723877, + "logps/chosen": -429.6944885253906, + "logps/rejected": -379.41595458984375, + "loss": 0.6074, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.7847088575363159, + "rewards/margins": 0.2951991558074951, + "rewards/rejected": -1.0799081325531006, + "step": 2310 + }, + { + "epoch": 0.3, + "eval_logits/chosen": -2.5742263793945312, + "eval_logits/rejected": -2.591237783432007, + "eval_logps/chosen": -419.9903259277344, + "eval_logps/rejected": -414.24224853515625, + "eval_loss": 0.6268322467803955, + "eval_rewards/accuracies": 0.6399999856948853, + "eval_rewards/chosen": -0.8724092841148376, + "eval_rewards/margins": 0.2589130699634552, + "eval_rewards/rejected": -1.1313222646713257, + "eval_runtime": 196.9684, + "eval_samples_per_second": 10.154, + "eval_steps_per_second": 5.077, + "step": 2310 + }, + { + "epoch": 0.3, + "learning_rate": 4.3951421668669165e-06, + "logits/chosen": -2.7886240482330322, + "logits/rejected": -2.7811214923858643, + "logps/chosen": -431.88958740234375, + "logps/rejected": -444.73175048828125, + "loss": 0.5575, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -0.8605194091796875, + "rewards/margins": 0.41099271178245544, + "rewards/rejected": -1.2715120315551758, + "step": 2320 + }, + { + "epoch": 0.3, + "eval_logits/chosen": -2.567586660385132, + "eval_logits/rejected": -2.5854969024658203, + "eval_logps/chosen": -426.3576354980469, + "eval_logps/rejected": -421.8108215332031, + "eval_loss": 0.6284373998641968, + "eval_rewards/accuracies": 0.6434999704360962, + "eval_rewards/chosen": -0.9360825419425964, + "eval_rewards/margins": 0.27092528343200684, + "eval_rewards/rejected": -1.2070077657699585, + "eval_runtime": 196.778, + "eval_samples_per_second": 10.164, + "eval_steps_per_second": 5.082, + "step": 2320 + }, + { + "epoch": 0.3, + "learning_rate": 4.3876729122279784e-06, + "logits/chosen": -2.776318073272705, + "logits/rejected": -2.809359312057495, + "logps/chosen": -338.4325256347656, + "logps/rejected": -368.7469177246094, + "loss": 0.5775, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.8960781097412109, + "rewards/margins": 0.3791848123073578, + "rewards/rejected": -1.2752629518508911, + "step": 2330 + }, + { + "epoch": 0.3, + "eval_logits/chosen": -2.5665230751037598, + "eval_logits/rejected": -2.584770679473877, + "eval_logps/chosen": -433.9892578125, + "eval_logps/rejected": -430.6888122558594, + "eval_loss": 0.6298844814300537, + "eval_rewards/accuracies": 0.6420000195503235, + "eval_rewards/chosen": -1.01239812374115, + "eval_rewards/margins": 0.28338971734046936, + "eval_rewards/rejected": -1.2957879304885864, + "eval_runtime": 196.7239, + "eval_samples_per_second": 10.167, + "eval_steps_per_second": 5.083, + "step": 2330 + }, + { + "epoch": 0.31, + "learning_rate": 4.3801642523471585e-06, + "logits/chosen": -2.8114333152770996, + "logits/rejected": -2.7745885848999023, + "logps/chosen": -434.7900390625, + "logps/rejected": -414.8701171875, + "loss": 0.5663, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.9342101812362671, + "rewards/margins": 0.3892834484577179, + "rewards/rejected": -1.3234935998916626, + "step": 2340 + }, + { + "epoch": 0.31, + "eval_logits/chosen": -2.5688867568969727, + "eval_logits/rejected": -2.5871498584747314, + "eval_logps/chosen": -436.9039306640625, + "eval_logps/rejected": -434.2275695800781, + "eval_loss": 0.6300011277198792, + "eval_rewards/accuracies": 0.6414999961853027, + "eval_rewards/chosen": -1.041544795036316, + "eval_rewards/margins": 0.2896304726600647, + "eval_rewards/rejected": -1.3311753273010254, + "eval_runtime": 196.9366, + "eval_samples_per_second": 10.156, + "eval_steps_per_second": 5.078, + "step": 2340 + }, + { + "epoch": 0.31, + "learning_rate": 4.37261634396801e-06, + "logits/chosen": -2.6677405834198, + "logits/rejected": -2.6676297187805176, + "logps/chosen": -426.80712890625, + "logps/rejected": -432.39813232421875, + "loss": 0.5976, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.0480899810791016, + "rewards/margins": 0.3257550299167633, + "rewards/rejected": -1.3738449811935425, + "step": 2350 + }, + { + "epoch": 0.31, + "eval_logits/chosen": -2.5646708011627197, + "eval_logits/rejected": -2.582854747772217, + "eval_logps/chosen": -435.26544189453125, + "eval_logps/rejected": -432.55438232421875, + "eval_loss": 0.6303899884223938, + "eval_rewards/accuracies": 0.6395000219345093, + "eval_rewards/chosen": -1.025160312652588, + "eval_rewards/margins": 0.28928351402282715, + "eval_rewards/rejected": -1.3144437074661255, + "eval_runtime": 196.9692, + "eval_samples_per_second": 10.154, + "eval_steps_per_second": 5.077, + "step": 2350 + }, + { + "epoch": 0.31, + "learning_rate": 4.365029344653401e-06, + "logits/chosen": -2.7826085090637207, + "logits/rejected": -2.7922708988189697, + "logps/chosen": -518.70361328125, + "logps/rejected": -454.2701721191406, + "loss": 0.6032, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.0072052478790283, + "rewards/margins": 0.377673864364624, + "rewards/rejected": -1.3848788738250732, + "step": 2360 + }, + { + "epoch": 0.31, + "eval_logits/chosen": -2.564948558807373, + "eval_logits/rejected": -2.582928419113159, + "eval_logps/chosen": -434.41937255859375, + "eval_logps/rejected": -431.6936950683594, + "eval_loss": 0.6297749876976013, + "eval_rewards/accuracies": 0.6395000219345093, + "eval_rewards/chosen": -1.0167001485824585, + "eval_rewards/margins": 0.28913629055023193, + "eval_rewards/rejected": -1.3058364391326904, + "eval_runtime": 196.9346, + "eval_samples_per_second": 10.156, + "eval_steps_per_second": 5.078, + "step": 2360 + }, + { + "epoch": 0.31, + "learning_rate": 4.35740341278222e-06, + "logits/chosen": -2.785799264907837, + "logits/rejected": -2.825850009918213, + "logps/chosen": -504.35968017578125, + "logps/rejected": -487.5384216308594, + "loss": 0.6569, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.952163577079773, + "rewards/margins": 0.20112566649913788, + "rewards/rejected": -1.1532893180847168, + "step": 2370 + }, + { + "epoch": 0.31, + "eval_logits/chosen": -2.565399169921875, + "eval_logits/rejected": -2.583078384399414, + "eval_logps/chosen": -431.9676208496094, + "eval_logps/rejected": -428.8084716796875, + "eval_loss": 0.6288526654243469, + "eval_rewards/accuracies": 0.640999972820282, + "eval_rewards/chosen": -0.992182195186615, + "eval_rewards/margins": 0.2848021686077118, + "eval_rewards/rejected": -1.2769843339920044, + "eval_runtime": 197.1699, + "eval_samples_per_second": 10.144, + "eval_steps_per_second": 5.072, + "step": 2370 + }, + { + "epoch": 0.31, + "learning_rate": 4.349738707546079e-06, + "logits/chosen": -2.6712303161621094, + "logits/rejected": -2.681317090988159, + "logps/chosen": -432.171630859375, + "logps/rejected": -398.49884033203125, + "loss": 0.6557, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.9553629755973816, + "rewards/margins": 0.19942878186702728, + "rewards/rejected": -1.1547917127609253, + "step": 2380 + }, + { + "epoch": 0.31, + "eval_logits/chosen": -2.5676705837249756, + "eval_logits/rejected": -2.5848608016967773, + "eval_logps/chosen": -434.3564453125, + "eval_logps/rejected": -431.3999328613281, + "eval_loss": 0.6287895441055298, + "eval_rewards/accuracies": 0.640999972820282, + "eval_rewards/chosen": -1.0160703659057617, + "eval_rewards/margins": 0.28682854771614075, + "eval_rewards/rejected": -1.3028990030288696, + "eval_runtime": 196.8905, + "eval_samples_per_second": 10.158, + "eval_steps_per_second": 5.079, + "step": 2380 + }, + { + "epoch": 0.31, + "learning_rate": 4.3420353889459835e-06, + "logits/chosen": -2.835454225540161, + "logits/rejected": -2.818660259246826, + "logps/chosen": -486.8052673339844, + "logps/rejected": -451.1253967285156, + "loss": 0.593, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.9559980630874634, + "rewards/margins": 0.37571167945861816, + "rewards/rejected": -1.331709623336792, + "step": 2390 + }, + { + "epoch": 0.31, + "eval_logits/chosen": -2.5628139972686768, + "eval_logits/rejected": -2.579824686050415, + "eval_logps/chosen": -436.363525390625, + "eval_logps/rejected": -433.6669921875, + "eval_loss": 0.62941575050354, + "eval_rewards/accuracies": 0.6414999961853027, + "eval_rewards/chosen": -1.0361416339874268, + "eval_rewards/margins": 0.2894286513328552, + "eval_rewards/rejected": -1.3255702257156372, + "eval_runtime": 197.1764, + "eval_samples_per_second": 10.143, + "eval_steps_per_second": 5.072, + "step": 2390 + }, + { + "epoch": 0.31, + "learning_rate": 4.334293617788992e-06, + "logits/chosen": -2.8445041179656982, + "logits/rejected": -2.79730486869812, + "logps/chosen": -416.4369201660156, + "logps/rejected": -369.0491638183594, + "loss": 0.5738, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -1.0192604064941406, + "rewards/margins": 0.47983551025390625, + "rewards/rejected": -1.4990959167480469, + "step": 2400 + }, + { + "epoch": 0.31, + "eval_logits/chosen": -2.565260171890259, + "eval_logits/rejected": -2.5818259716033936, + "eval_logps/chosen": -431.8421325683594, + "eval_logps/rejected": -428.3226623535156, + "eval_loss": 0.628280520439148, + "eval_rewards/accuracies": 0.6395000219345093, + "eval_rewards/chosen": -0.9909270405769348, + "eval_rewards/margins": 0.2811991274356842, + "eval_rewards/rejected": -1.272126317024231, + "eval_runtime": 196.9197, + "eval_samples_per_second": 10.156, + "eval_steps_per_second": 5.078, + "step": 2400 + }, + { + "epoch": 0.32, + "learning_rate": 4.326513555684867e-06, + "logits/chosen": -2.804062843322754, + "logits/rejected": -2.7835028171539307, + "logps/chosen": -459.07330322265625, + "logps/rejected": -400.3332214355469, + "loss": 0.6016, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.8659582138061523, + "rewards/margins": 0.28941652178764343, + "rewards/rejected": -1.1553747653961182, + "step": 2410 + }, + { + "epoch": 0.32, + "eval_logits/chosen": -2.569629430770874, + "eval_logits/rejected": -2.5860977172851562, + "eval_logps/chosen": -427.0762634277344, + "eval_logps/rejected": -422.9002380371094, + "eval_loss": 0.6277644038200378, + "eval_rewards/accuracies": 0.6380000114440918, + "eval_rewards/chosen": -0.9432685375213623, + "eval_rewards/margins": 0.27463406324386597, + "eval_rewards/rejected": -1.2179025411605835, + "eval_runtime": 196.9404, + "eval_samples_per_second": 10.155, + "eval_steps_per_second": 5.078, + "step": 2410 + }, + { + "epoch": 0.32, + "learning_rate": 4.31869536504269e-06, + "logits/chosen": -2.7398853302001953, + "logits/rejected": -2.775299310684204, + "logps/chosen": -407.65118408203125, + "logps/rejected": -421.64093017578125, + "loss": 0.5889, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.9197827577590942, + "rewards/margins": 0.32822954654693604, + "rewards/rejected": -1.2480123043060303, + "step": 2420 + }, + { + "epoch": 0.32, + "eval_logits/chosen": -2.556795835494995, + "eval_logits/rejected": -2.5738165378570557, + "eval_logps/chosen": -426.9832458496094, + "eval_logps/rejected": -422.9380187988281, + "eval_loss": 0.6303883194923401, + "eval_rewards/accuracies": 0.6345000267028809, + "eval_rewards/chosen": -0.9423384070396423, + "eval_rewards/margins": 0.27594175934791565, + "eval_rewards/rejected": -1.2182801961898804, + "eval_runtime": 196.9091, + "eval_samples_per_second": 10.157, + "eval_steps_per_second": 5.078, + "step": 2420 + }, + { + "epoch": 0.32, + "learning_rate": 4.310839209067482e-06, + "logits/chosen": -2.842728853225708, + "logits/rejected": -2.793224334716797, + "logps/chosen": -423.9481506347656, + "logps/rejected": -408.9284362792969, + "loss": 0.6591, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.9646501541137695, + "rewards/margins": 0.16199491918087006, + "rewards/rejected": -1.1266452074050903, + "step": 2430 + }, + { + "epoch": 0.32, + "eval_logits/chosen": -2.5522592067718506, + "eval_logits/rejected": -2.5687339305877686, + "eval_logps/chosen": -426.2090759277344, + "eval_logps/rejected": -422.0822448730469, + "eval_loss": 0.6298808455467224, + "eval_rewards/accuracies": 0.6324999928474426, + "eval_rewards/chosen": -0.9345968961715698, + "eval_rewards/margins": 0.2751254737377167, + "eval_rewards/rejected": -1.2097221612930298, + "eval_runtime": 197.0417, + "eval_samples_per_second": 10.15, + "eval_steps_per_second": 5.075, + "step": 2430 + }, + { + "epoch": 0.32, + "learning_rate": 4.302945251756788e-06, + "logits/chosen": -2.7333877086639404, + "logits/rejected": -2.7457363605499268, + "logps/chosen": -420.790283203125, + "logps/rejected": -406.7698669433594, + "loss": 0.5945, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -0.9372466802597046, + "rewards/margins": 0.35772770643234253, + "rewards/rejected": -1.294974446296692, + "step": 2440 + }, + { + "epoch": 0.32, + "eval_logits/chosen": -2.5549113750457764, + "eval_logits/rejected": -2.571284294128418, + "eval_logps/chosen": -422.33050537109375, + "eval_logps/rejected": -417.47723388671875, + "eval_loss": 0.6298490166664124, + "eval_rewards/accuracies": 0.6349999904632568, + "eval_rewards/chosen": -0.8958110213279724, + "eval_rewards/margins": 0.26786088943481445, + "eval_rewards/rejected": -1.1636719703674316, + "eval_runtime": 197.0623, + "eval_samples_per_second": 10.149, + "eval_steps_per_second": 5.075, + "step": 2440 + }, + { + "epoch": 0.32, + "learning_rate": 4.29501365789726e-06, + "logits/chosen": -2.744837522506714, + "logits/rejected": -2.7025675773620605, + "logps/chosen": -375.1982727050781, + "logps/rejected": -369.05279541015625, + "loss": 0.6374, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.9093378186225891, + "rewards/margins": 0.3083241879940033, + "rewards/rejected": -1.2176620960235596, + "step": 2450 + }, + { + "epoch": 0.32, + "eval_logits/chosen": -2.5562515258789062, + "eval_logits/rejected": -2.572701930999756, + "eval_logps/chosen": -420.1925964355469, + "eval_logps/rejected": -415.2203063964844, + "eval_loss": 0.6289076805114746, + "eval_rewards/accuracies": 0.6365000009536743, + "eval_rewards/chosen": -0.8744320273399353, + "eval_rewards/margins": 0.26667073369026184, + "eval_rewards/rejected": -1.1411027908325195, + "eval_runtime": 196.7816, + "eval_samples_per_second": 10.164, + "eval_steps_per_second": 5.082, + "step": 2450 + }, + { + "epoch": 0.32, + "learning_rate": 4.2870445930612135e-06, + "logits/chosen": -2.7384285926818848, + "logits/rejected": -2.7128889560699463, + "logps/chosen": -462.66632080078125, + "logps/rejected": -456.52777099609375, + "loss": 0.5373, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.7665729522705078, + "rewards/margins": 0.485451877117157, + "rewards/rejected": -1.2520248889923096, + "step": 2460 + }, + { + "epoch": 0.32, + "eval_logits/chosen": -2.5484607219696045, + "eval_logits/rejected": -2.56520676612854, + "eval_logps/chosen": -422.5688781738281, + "eval_logps/rejected": -417.9743347167969, + "eval_loss": 0.6304011344909668, + "eval_rewards/accuracies": 0.6324999928474426, + "eval_rewards/chosen": -0.8981947302818298, + "eval_rewards/margins": 0.2704484164714813, + "eval_rewards/rejected": -1.1686433553695679, + "eval_runtime": 196.9165, + "eval_samples_per_second": 10.157, + "eval_steps_per_second": 5.078, + "step": 2460 + }, + { + "epoch": 0.32, + "learning_rate": 4.279038223603171e-06, + "logits/chosen": -2.7502496242523193, + "logits/rejected": -2.770395278930664, + "logps/chosen": -421.0673828125, + "logps/rejected": -402.5542907714844, + "loss": 0.5907, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.8335832357406616, + "rewards/margins": 0.3773055672645569, + "rewards/rejected": -1.2108887434005737, + "step": 2470 + }, + { + "epoch": 0.32, + "eval_logits/chosen": -2.532897472381592, + "eval_logits/rejected": -2.5502543449401855, + "eval_logps/chosen": -432.739013671875, + "eval_logps/rejected": -429.7646179199219, + "eval_loss": 0.632610559463501, + "eval_rewards/accuracies": 0.6309999823570251, + "eval_rewards/chosen": -0.9998957514762878, + "eval_rewards/margins": 0.28665024042129517, + "eval_rewards/rejected": -1.2865458726882935, + "eval_runtime": 196.9374, + "eval_samples_per_second": 10.156, + "eval_steps_per_second": 5.078, + "step": 2470 + }, + { + "epoch": 0.32, + "learning_rate": 4.2709947166563906e-06, + "logits/chosen": -2.638233184814453, + "logits/rejected": -2.607182025909424, + "logps/chosen": -435.4264221191406, + "logps/rejected": -462.9147033691406, + "loss": 0.611, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.0771570205688477, + "rewards/margins": 0.339433491230011, + "rewards/rejected": -1.4165904521942139, + "step": 2480 + }, + { + "epoch": 0.32, + "eval_logits/chosen": -2.5232503414154053, + "eval_logits/rejected": -2.540679693222046, + "eval_logps/chosen": -433.52130126953125, + "eval_logps/rejected": -430.7619323730469, + "eval_loss": 0.6332414746284485, + "eval_rewards/accuracies": 0.6340000033378601, + "eval_rewards/chosen": -1.0077186822891235, + "eval_rewards/margins": 0.2888000011444092, + "eval_rewards/rejected": -1.2965186834335327, + "eval_runtime": 196.9153, + "eval_samples_per_second": 10.157, + "eval_steps_per_second": 5.078, + "step": 2480 + }, + { + "epoch": 0.33, + "learning_rate": 4.262914240129379e-06, + "logits/chosen": -2.7348380088806152, + "logits/rejected": -2.7146236896514893, + "logps/chosen": -457.7591247558594, + "logps/rejected": -439.1285705566406, + "loss": 0.6033, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.9677481651306152, + "rewards/margins": 0.43764448165893555, + "rewards/rejected": -1.4053925275802612, + "step": 2490 + }, + { + "epoch": 0.33, + "eval_logits/chosen": -2.5245630741119385, + "eval_logits/rejected": -2.5417044162750244, + "eval_logps/chosen": -431.026123046875, + "eval_logps/rejected": -427.7772216796875, + "eval_loss": 0.6321043968200684, + "eval_rewards/accuracies": 0.6365000009536743, + "eval_rewards/chosen": -0.9827673435211182, + "eval_rewards/margins": 0.28390470147132874, + "eval_rewards/rejected": -1.266672134399414, + "eval_runtime": 196.7971, + "eval_samples_per_second": 10.163, + "eval_steps_per_second": 5.081, + "step": 2490 + }, + { + "epoch": 0.33, + "learning_rate": 4.254796962702382e-06, + "logits/chosen": -2.7546756267547607, + "logits/rejected": -2.7376418113708496, + "logps/chosen": -446.4517517089844, + "logps/rejected": -444.3236389160156, + "loss": 0.6122, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.8910658955574036, + "rewards/margins": 0.300523579120636, + "rewards/rejected": -1.191589593887329, + "step": 2500 + }, + { + "epoch": 0.33, + "eval_logits/chosen": -2.5351521968841553, + "eval_logits/rejected": -2.551602602005005, + "eval_logps/chosen": -429.4399108886719, + "eval_logps/rejected": -425.7723693847656, + "eval_loss": 0.6307942867279053, + "eval_rewards/accuracies": 0.6365000009536743, + "eval_rewards/chosen": -0.966904878616333, + "eval_rewards/margins": 0.27971866726875305, + "eval_rewards/rejected": -1.2466236352920532, + "eval_runtime": 197.2545, + "eval_samples_per_second": 10.139, + "eval_steps_per_second": 5.07, + "step": 2500 + }, + { + "epoch": 0.33, + "learning_rate": 4.246643053823864e-06, + "logits/chosen": -2.7471210956573486, + "logits/rejected": -2.7411389350891113, + "logps/chosen": -359.96807861328125, + "logps/rejected": -394.1663818359375, + "loss": 0.6077, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.8434604406356812, + "rewards/margins": 0.3051786720752716, + "rewards/rejected": -1.1486390829086304, + "step": 2510 + }, + { + "epoch": 0.33, + "eval_logits/chosen": -2.548297882080078, + "eval_logits/rejected": -2.5639851093292236, + "eval_logps/chosen": -429.1539306640625, + "eval_logps/rejected": -425.4289855957031, + "eval_loss": 0.628643810749054, + "eval_rewards/accuracies": 0.640500009059906, + "eval_rewards/chosen": -0.9640450477600098, + "eval_rewards/margins": 0.27914461493492126, + "eval_rewards/rejected": -1.2431896924972534, + "eval_runtime": 197.0617, + "eval_samples_per_second": 10.149, + "eval_steps_per_second": 5.075, + "step": 2510 + }, + { + "epoch": 0.33, + "learning_rate": 4.238452683706979e-06, + "logits/chosen": -2.7691006660461426, + "logits/rejected": -2.7818400859832764, + "logps/chosen": -388.3199462890625, + "logps/rejected": -355.18719482421875, + "loss": 0.6255, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.9922312498092651, + "rewards/margins": 0.24085617065429688, + "rewards/rejected": -1.2330873012542725, + "step": 2520 + }, + { + "epoch": 0.33, + "eval_logits/chosen": -2.5428717136383057, + "eval_logits/rejected": -2.5581729412078857, + "eval_logps/chosen": -435.9595947265625, + "eval_logps/rejected": -433.4538269042969, + "eval_loss": 0.6296377182006836, + "eval_rewards/accuracies": 0.6389999985694885, + "eval_rewards/chosen": -1.032102108001709, + "eval_rewards/margins": 0.2913359999656677, + "eval_rewards/rejected": -1.3234381675720215, + "eval_runtime": 197.3217, + "eval_samples_per_second": 10.136, + "eval_steps_per_second": 5.068, + "step": 2520 + }, + { + "epoch": 0.33, + "learning_rate": 4.2302260233260025e-06, + "logits/chosen": -2.712089776992798, + "logits/rejected": -2.762547731399536, + "logps/chosen": -442.83929443359375, + "logps/rejected": -461.90924072265625, + "loss": 0.6172, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.0242395401000977, + "rewards/margins": 0.3576509356498718, + "rewards/rejected": -1.3818905353546143, + "step": 2530 + }, + { + "epoch": 0.33, + "eval_logits/chosen": -2.53926420211792, + "eval_logits/rejected": -2.5550448894500732, + "eval_logps/chosen": -437.32904052734375, + "eval_logps/rejected": -435.19122314453125, + "eval_loss": 0.63003009557724, + "eval_rewards/accuracies": 0.640999972820282, + "eval_rewards/chosen": -1.0457963943481445, + "eval_rewards/margins": 0.29501575231552124, + "eval_rewards/rejected": -1.340812087059021, + "eval_runtime": 197.2545, + "eval_samples_per_second": 10.139, + "eval_steps_per_second": 5.07, + "step": 2530 + }, + { + "epoch": 0.33, + "learning_rate": 4.2219632444127766e-06, + "logits/chosen": -2.6461236476898193, + "logits/rejected": -2.662266969680786, + "logps/chosen": -439.08544921875, + "logps/rejected": -439.2572326660156, + "loss": 0.6536, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.9915159344673157, + "rewards/margins": 0.23148474097251892, + "rewards/rejected": -1.2230005264282227, + "step": 2540 + }, + { + "epoch": 0.33, + "eval_logits/chosen": -2.5464367866516113, + "eval_logits/rejected": -2.5626463890075684, + "eval_logps/chosen": -430.1457824707031, + "eval_logps/rejected": -426.9624938964844, + "eval_loss": 0.6277977824211121, + "eval_rewards/accuracies": 0.6434999704360962, + "eval_rewards/chosen": -0.9739632606506348, + "eval_rewards/margins": 0.2845614552497864, + "eval_rewards/rejected": -1.2585248947143555, + "eval_runtime": 196.8842, + "eval_samples_per_second": 10.158, + "eval_steps_per_second": 5.079, + "step": 2540 + }, + { + "epoch": 0.33, + "learning_rate": 4.213664519453115e-06, + "logits/chosen": -2.822821617126465, + "logits/rejected": -2.768632650375366, + "logps/chosen": -404.8807373046875, + "logps/rejected": -409.8775939941406, + "loss": 0.6565, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -1.0015974044799805, + "rewards/margins": 0.19808810949325562, + "rewards/rejected": -1.1996854543685913, + "step": 2550 + }, + { + "epoch": 0.33, + "eval_logits/chosen": -2.560555934906006, + "eval_logits/rejected": -2.576713800430298, + "eval_logps/chosen": -421.6891174316406, + "eval_logps/rejected": -416.94012451171875, + "eval_loss": 0.6262774467468262, + "eval_rewards/accuracies": 0.6399999856948853, + "eval_rewards/chosen": -0.8893969655036926, + "eval_rewards/margins": 0.26890408992767334, + "eval_rewards/rejected": -1.1583009958267212, + "eval_runtime": 196.7892, + "eval_samples_per_second": 10.163, + "eval_steps_per_second": 5.082, + "step": 2550 + }, + { + "epoch": 0.33, + "learning_rate": 4.205330021683208e-06, + "logits/chosen": -2.661653995513916, + "logits/rejected": -2.6717755794525146, + "logps/chosen": -348.31427001953125, + "logps/rejected": -350.11859130859375, + "loss": 0.6676, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.7914212346076965, + "rewards/margins": 0.11484186351299286, + "rewards/rejected": -0.9062630534172058, + "step": 2560 + }, + { + "epoch": 0.33, + "eval_logits/chosen": -2.56754207611084, + "eval_logits/rejected": -2.583657741546631, + "eval_logps/chosen": -412.23150634765625, + "eval_logps/rejected": -405.8481750488281, + "eval_loss": 0.6265344023704529, + "eval_rewards/accuracies": 0.6439999938011169, + "eval_rewards/chosen": -0.7948205471038818, + "eval_rewards/margins": 0.25256121158599854, + "eval_rewards/rejected": -1.04738187789917, + "eval_runtime": 196.8811, + "eval_samples_per_second": 10.158, + "eval_steps_per_second": 5.079, + "step": 2560 + }, + { + "epoch": 0.34, + "learning_rate": 4.196959925086008e-06, + "logits/chosen": -2.756273031234741, + "logits/rejected": -2.7312004566192627, + "logps/chosen": -399.8543701171875, + "logps/rejected": -426.33099365234375, + "loss": 0.6483, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.7400082349777222, + "rewards/margins": 0.16529114544391632, + "rewards/rejected": -0.9052993655204773, + "step": 2570 + }, + { + "epoch": 0.34, + "eval_logits/chosen": -2.579993724822998, + "eval_logits/rejected": -2.595771074295044, + "eval_logps/chosen": -402.87548828125, + "eval_logps/rejected": -394.6590576171875, + "eval_loss": 0.627536952495575, + "eval_rewards/accuracies": 0.6489999890327454, + "eval_rewards/chosen": -0.701261043548584, + "eval_rewards/margins": 0.23422937095165253, + "eval_rewards/rejected": -0.9354904890060425, + "eval_runtime": 197.3793, + "eval_samples_per_second": 10.133, + "eval_steps_per_second": 5.066, + "step": 2570 + }, + { + "epoch": 0.34, + "learning_rate": 4.188554404387588e-06, + "logits/chosen": -2.831542730331421, + "logits/rejected": -2.8460183143615723, + "logps/chosen": -430.8309020996094, + "logps/rejected": -411.8692321777344, + "loss": 0.6541, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.7262514233589172, + "rewards/margins": 0.167301207780838, + "rewards/rejected": -0.8935526609420776, + "step": 2580 + }, + { + "epoch": 0.34, + "eval_logits/chosen": -2.590602159500122, + "eval_logits/rejected": -2.606855630874634, + "eval_logps/chosen": -401.1507873535156, + "eval_logps/rejected": -392.6910095214844, + "eval_loss": 0.6273356676101685, + "eval_rewards/accuracies": 0.6495000123977661, + "eval_rewards/chosen": -0.6840137839317322, + "eval_rewards/margins": 0.23179614543914795, + "eval_rewards/rejected": -0.9158099293708801, + "eval_runtime": 196.9199, + "eval_samples_per_second": 10.156, + "eval_steps_per_second": 5.078, + "step": 2580 + }, + { + "epoch": 0.34, + "learning_rate": 4.180113635053504e-06, + "logits/chosen": -2.8526382446289062, + "logits/rejected": -2.837333917617798, + "logps/chosen": -375.9033508300781, + "logps/rejected": -425.47607421875, + "loss": 0.6052, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.7050365805625916, + "rewards/margins": 0.3069326877593994, + "rewards/rejected": -1.0119692087173462, + "step": 2590 + }, + { + "epoch": 0.34, + "eval_logits/chosen": -2.5913565158843994, + "eval_logits/rejected": -2.6081368923187256, + "eval_logps/chosen": -404.6280822753906, + "eval_logps/rejected": -396.76959228515625, + "eval_loss": 0.6276716589927673, + "eval_rewards/accuracies": 0.6504999995231628, + "eval_rewards/chosen": -0.7187868356704712, + "eval_rewards/margins": 0.23780903220176697, + "eval_rewards/rejected": -0.9565958976745605, + "eval_runtime": 196.9018, + "eval_samples_per_second": 10.157, + "eval_steps_per_second": 5.079, + "step": 2590 + }, + { + "epoch": 0.34, + "learning_rate": 4.17163779328513e-06, + "logits/chosen": -2.7927684783935547, + "logits/rejected": -2.7561044692993164, + "logps/chosen": -401.1669921875, + "logps/rejected": -393.67791748046875, + "loss": 0.6153, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.6651914715766907, + "rewards/margins": 0.30985796451568604, + "rewards/rejected": -0.9750493764877319, + "step": 2600 + }, + { + "epoch": 0.34, + "eval_logits/chosen": -2.5893898010253906, + "eval_logits/rejected": -2.6071102619171143, + "eval_logps/chosen": -411.3906555175781, + "eval_logps/rejected": -404.7594909667969, + "eval_loss": 0.6282112002372742, + "eval_rewards/accuracies": 0.6395000219345093, + "eval_rewards/chosen": -0.7864127159118652, + "eval_rewards/margins": 0.2500820457935333, + "eval_rewards/rejected": -1.0364947319030762, + "eval_runtime": 197.2993, + "eval_samples_per_second": 10.137, + "eval_steps_per_second": 5.068, + "step": 2600 + }, + { + "epoch": 0.34, + "learning_rate": 4.163127056015975e-06, + "logits/chosen": -2.7800028324127197, + "logits/rejected": -2.7452735900878906, + "logps/chosen": -428.4466857910156, + "logps/rejected": -435.05194091796875, + "loss": 0.616, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.7805007100105286, + "rewards/margins": 0.31978195905685425, + "rewards/rejected": -1.1002826690673828, + "step": 2610 + }, + { + "epoch": 0.34, + "eval_logits/chosen": -2.590467691421509, + "eval_logits/rejected": -2.608050584793091, + "eval_logps/chosen": -417.75006103515625, + "eval_logps/rejected": -411.94488525390625, + "eval_loss": 0.6286919116973877, + "eval_rewards/accuracies": 0.6380000114440918, + "eval_rewards/chosen": -0.8500065207481384, + "eval_rewards/margins": 0.258341908454895, + "eval_rewards/rejected": -1.1083483695983887, + "eval_runtime": 196.9134, + "eval_samples_per_second": 10.157, + "eval_steps_per_second": 5.078, + "step": 2610 + }, + { + "epoch": 0.34, + "learning_rate": 4.154581600907994e-06, + "logits/chosen": -2.7846765518188477, + "logits/rejected": -2.7442469596862793, + "logps/chosen": -391.93023681640625, + "logps/rejected": -392.11932373046875, + "loss": 0.5428, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.7660864591598511, + "rewards/margins": 0.4419211447238922, + "rewards/rejected": -1.208007574081421, + "step": 2620 + }, + { + "epoch": 0.34, + "eval_logits/chosen": -2.6004793643951416, + "eval_logits/rejected": -2.6181156635284424, + "eval_logps/chosen": -427.6159362792969, + "eval_logps/rejected": -423.33331298828125, + "eval_loss": 0.6286585927009583, + "eval_rewards/accuracies": 0.6365000009536743, + "eval_rewards/chosen": -0.9486655592918396, + "eval_rewards/margins": 0.2735675275325775, + "eval_rewards/rejected": -1.2222331762313843, + "eval_runtime": 197.0103, + "eval_samples_per_second": 10.152, + "eval_steps_per_second": 5.076, + "step": 2620 + }, + { + "epoch": 0.34, + "learning_rate": 4.14600160634788e-06, + "logits/chosen": -2.7774970531463623, + "logits/rejected": -2.7458691596984863, + "logps/chosen": -388.33575439453125, + "logps/rejected": -434.8145446777344, + "loss": 0.5982, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.9897419214248657, + "rewards/margins": 0.3823489546775818, + "rewards/rejected": -1.3720909357070923, + "step": 2630 + }, + { + "epoch": 0.34, + "eval_logits/chosen": -2.5988712310791016, + "eval_logits/rejected": -2.61651873588562, + "eval_logps/chosen": -435.7413024902344, + "eval_logps/rejected": -432.87921142578125, + "eval_loss": 0.630751371383667, + "eval_rewards/accuracies": 0.6384999752044678, + "eval_rewards/chosen": -1.029918909072876, + "eval_rewards/margins": 0.2877727448940277, + "eval_rewards/rejected": -1.3176918029785156, + "eval_runtime": 197.0253, + "eval_samples_per_second": 10.151, + "eval_steps_per_second": 5.075, + "step": 2630 + }, + { + "epoch": 0.35, + "learning_rate": 4.137387251443335e-06, + "logits/chosen": -2.788888931274414, + "logits/rejected": -2.7759616374969482, + "logps/chosen": -409.748291015625, + "logps/rejected": -384.11199951171875, + "loss": 0.609, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.9789314270019531, + "rewards/margins": 0.31255120038986206, + "rewards/rejected": -1.29148268699646, + "step": 2640 + }, + { + "epoch": 0.35, + "eval_logits/chosen": -2.6059696674346924, + "eval_logits/rejected": -2.623617649078369, + "eval_logps/chosen": -432.4779357910156, + "eval_logps/rejected": -429.3039245605469, + "eval_loss": 0.629350483417511, + "eval_rewards/accuracies": 0.6359999775886536, + "eval_rewards/chosen": -0.9972848892211914, + "eval_rewards/margins": 0.2846539616584778, + "eval_rewards/rejected": -1.281938910484314, + "eval_runtime": 196.9399, + "eval_samples_per_second": 10.155, + "eval_steps_per_second": 5.078, + "step": 2640 + }, + { + "epoch": 0.35, + "learning_rate": 4.128738716019338e-06, + "logits/chosen": -2.7614262104034424, + "logits/rejected": -2.7496438026428223, + "logps/chosen": -448.43798828125, + "logps/rejected": -452.8334045410156, + "loss": 0.5885, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.8865548968315125, + "rewards/margins": 0.39715567231178284, + "rewards/rejected": -1.2837104797363281, + "step": 2650 + }, + { + "epoch": 0.35, + "eval_logits/chosen": -2.608200788497925, + "eval_logits/rejected": -2.6260952949523926, + "eval_logps/chosen": -431.5092468261719, + "eval_logps/rejected": -428.17840576171875, + "eval_loss": 0.6292994618415833, + "eval_rewards/accuracies": 0.6349999904632568, + "eval_rewards/chosen": -0.987598717212677, + "eval_rewards/margins": 0.28308507800102234, + "eval_rewards/rejected": -1.2706836462020874, + "eval_runtime": 196.8459, + "eval_samples_per_second": 10.16, + "eval_steps_per_second": 5.08, + "step": 2650 + }, + { + "epoch": 0.35, + "learning_rate": 4.120056180614386e-06, + "logits/chosen": -2.6786999702453613, + "logits/rejected": -2.662436008453369, + "logps/chosen": -402.8612365722656, + "logps/rejected": -436.7867126464844, + "loss": 0.6198, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.0017694234848022, + "rewards/margins": 0.2881324291229248, + "rewards/rejected": -1.2899019718170166, + "step": 2660 + }, + { + "epoch": 0.35, + "eval_logits/chosen": -2.5987579822540283, + "eval_logits/rejected": -2.618000030517578, + "eval_logps/chosen": -432.24072265625, + "eval_logps/rejected": -429.0343933105469, + "eval_loss": 0.630684494972229, + "eval_rewards/accuracies": 0.6355000138282776, + "eval_rewards/chosen": -0.9949126243591309, + "eval_rewards/margins": 0.28433096408843994, + "eval_rewards/rejected": -1.2792433500289917, + "eval_runtime": 197.0952, + "eval_samples_per_second": 10.147, + "eval_steps_per_second": 5.074, + "step": 2660 + }, + { + "epoch": 0.35, + "learning_rate": 4.111339826476725e-06, + "logits/chosen": -2.7120726108551025, + "logits/rejected": -2.7115330696105957, + "logps/chosen": -393.4505310058594, + "logps/rejected": -416.03753662109375, + "loss": 0.6223, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -1.0049831867218018, + "rewards/margins": 0.28646284341812134, + "rewards/rejected": -1.2914460897445679, + "step": 2670 + }, + { + "epoch": 0.35, + "eval_logits/chosen": -2.58608341217041, + "eval_logits/rejected": -2.6061620712280273, + "eval_logps/chosen": -433.83221435546875, + "eval_logps/rejected": -431.00537109375, + "eval_loss": 0.6331284046173096, + "eval_rewards/accuracies": 0.6340000033378601, + "eval_rewards/chosen": -1.010827660560608, + "eval_rewards/margins": 0.2881257236003876, + "eval_rewards/rejected": -1.2989535331726074, + "eval_runtime": 196.8111, + "eval_samples_per_second": 10.162, + "eval_steps_per_second": 5.081, + "step": 2670 + }, + { + "epoch": 0.35, + "learning_rate": 4.102589835560572e-06, + "logits/chosen": -2.7702507972717285, + "logits/rejected": -2.7156424522399902, + "logps/chosen": -487.67431640625, + "logps/rejected": -437.3570251464844, + "loss": 0.6479, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.9811779856681824, + "rewards/margins": 0.17895013093948364, + "rewards/rejected": -1.1601279973983765, + "step": 2680 + }, + { + "epoch": 0.35, + "eval_logits/chosen": -2.5870747566223145, + "eval_logits/rejected": -2.607055902481079, + "eval_logps/chosen": -432.8543701171875, + "eval_logps/rejected": -430.0517272949219, + "eval_loss": 0.6314911842346191, + "eval_rewards/accuracies": 0.6365000009536743, + "eval_rewards/chosen": -1.0010497570037842, + "eval_rewards/margins": 0.2883668541908264, + "eval_rewards/rejected": -1.2894165515899658, + "eval_runtime": 196.9253, + "eval_samples_per_second": 10.156, + "eval_steps_per_second": 5.078, + "step": 2680 + }, + { + "epoch": 0.35, + "learning_rate": 4.09380639052231e-06, + "logits/chosen": -2.758643627166748, + "logits/rejected": -2.779642105102539, + "logps/chosen": -446.88006591796875, + "logps/rejected": -497.0462951660156, + "loss": 0.5703, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.9354826211929321, + "rewards/margins": 0.4081154763698578, + "rewards/rejected": -1.3435981273651123, + "step": 2690 + }, + { + "epoch": 0.35, + "eval_logits/chosen": -2.585550308227539, + "eval_logits/rejected": -2.605078935623169, + "eval_logps/chosen": -436.9498291015625, + "eval_logps/rejected": -434.83868408203125, + "eval_loss": 0.6306910514831543, + "eval_rewards/accuracies": 0.6365000009536743, + "eval_rewards/chosen": -1.0420043468475342, + "eval_rewards/margins": 0.2952825129032135, + "eval_rewards/rejected": -1.3372868299484253, + "eval_runtime": 197.3558, + "eval_samples_per_second": 10.134, + "eval_steps_per_second": 5.067, + "step": 2690 + }, + { + "epoch": 0.35, + "learning_rate": 4.084989674716679e-06, + "logits/chosen": -2.7644388675689697, + "logits/rejected": -2.6968836784362793, + "logps/chosen": -450.021484375, + "logps/rejected": -462.6543884277344, + "loss": 0.6217, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.0556700229644775, + "rewards/margins": 0.2913525700569153, + "rewards/rejected": -1.3470226526260376, + "step": 2700 + }, + { + "epoch": 0.35, + "eval_logits/chosen": -2.588069200515747, + "eval_logits/rejected": -2.607356309890747, + "eval_logps/chosen": -439.2960510253906, + "eval_logps/rejected": -437.5450439453125, + "eval_loss": 0.6307061910629272, + "eval_rewards/accuracies": 0.6334999799728394, + "eval_rewards/chosen": -1.065466284751892, + "eval_rewards/margins": 0.2988835871219635, + "eval_rewards/rejected": -1.3643499612808228, + "eval_runtime": 197.153, + "eval_samples_per_second": 10.144, + "eval_steps_per_second": 5.072, + "step": 2700 + }, + { + "epoch": 0.35, + "learning_rate": 4.076139872192949e-06, + "logits/chosen": -2.795623302459717, + "logits/rejected": -2.7657852172851562, + "logps/chosen": -493.7920837402344, + "logps/rejected": -456.6087951660156, + "loss": 0.6502, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.1028454303741455, + "rewards/margins": 0.2741045653820038, + "rewards/rejected": -1.3769499063491821, + "step": 2710 + }, + { + "epoch": 0.35, + "eval_logits/chosen": -2.6020870208740234, + "eval_logits/rejected": -2.621488332748413, + "eval_logps/chosen": -431.4540710449219, + "eval_logps/rejected": -428.3634033203125, + "eval_loss": 0.6275376081466675, + "eval_rewards/accuracies": 0.6349999904632568, + "eval_rewards/chosen": -0.9870465993881226, + "eval_rewards/margins": 0.28548726439476013, + "eval_rewards/rejected": -1.2725337743759155, + "eval_runtime": 197.0663, + "eval_samples_per_second": 10.149, + "eval_steps_per_second": 5.074, + "step": 2710 + }, + { + "epoch": 0.36, + "learning_rate": 4.067257167691074e-06, + "logits/chosen": -2.77093768119812, + "logits/rejected": -2.799267292022705, + "logps/chosen": -462.2300720214844, + "logps/rejected": -478.0562438964844, + "loss": 0.6013, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.9543863534927368, + "rewards/margins": 0.37834474444389343, + "rewards/rejected": -1.332731008529663, + "step": 2720 + }, + { + "epoch": 0.36, + "eval_logits/chosen": -2.616654872894287, + "eval_logits/rejected": -2.635721445083618, + "eval_logps/chosen": -424.5511169433594, + "eval_logps/rejected": -420.2796325683594, + "eval_loss": 0.6254580020904541, + "eval_rewards/accuracies": 0.6380000114440918, + "eval_rewards/chosen": -0.9180174469947815, + "eval_rewards/margins": 0.27367839217185974, + "eval_rewards/rejected": -1.1916959285736084, + "eval_runtime": 197.111, + "eval_samples_per_second": 10.147, + "eval_steps_per_second": 5.073, + "step": 2720 + }, + { + "epoch": 0.36, + "learning_rate": 4.05834174663784e-06, + "logits/chosen": -2.8080220222473145, + "logits/rejected": -2.8545610904693604, + "logps/chosen": -444.73626708984375, + "logps/rejected": -422.92510986328125, + "loss": 0.643, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.9885362386703491, + "rewards/margins": 0.29533851146698, + "rewards/rejected": -1.283874750137329, + "step": 2730 + }, + { + "epoch": 0.36, + "eval_logits/chosen": -2.6182029247283936, + "eval_logits/rejected": -2.637312173843384, + "eval_logps/chosen": -424.8026123046875, + "eval_logps/rejected": -420.6578674316406, + "eval_loss": 0.6249555945396423, + "eval_rewards/accuracies": 0.6399999856948853, + "eval_rewards/chosen": -0.9205319881439209, + "eval_rewards/margins": 0.27494680881500244, + "eval_rewards/rejected": -1.1954787969589233, + "eval_runtime": 196.5399, + "eval_samples_per_second": 10.176, + "eval_steps_per_second": 5.088, + "step": 2730 + }, + { + "epoch": 0.36, + "learning_rate": 4.0493937951429895e-06, + "logits/chosen": -2.8887510299682617, + "logits/rejected": -2.891409397125244, + "logps/chosen": -423.8211364746094, + "logps/rejected": -397.92938232421875, + "loss": 0.6072, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.8922034502029419, + "rewards/margins": 0.3024117350578308, + "rewards/rejected": -1.194615125656128, + "step": 2740 + }, + { + "epoch": 0.36, + "eval_logits/chosen": -2.6147515773773193, + "eval_logits/rejected": -2.6340131759643555, + "eval_logps/chosen": -425.80096435546875, + "eval_logps/rejected": -421.8180236816406, + "eval_loss": 0.6246365308761597, + "eval_rewards/accuracies": 0.640999972820282, + "eval_rewards/chosen": -0.930515468120575, + "eval_rewards/margins": 0.2765650153160095, + "eval_rewards/rejected": -1.207080364227295, + "eval_runtime": 196.9303, + "eval_samples_per_second": 10.156, + "eval_steps_per_second": 5.078, + "step": 2740 + }, + { + "epoch": 0.36, + "learning_rate": 4.040413499995343e-06, + "logits/chosen": -2.8133509159088135, + "logits/rejected": -2.780090570449829, + "logps/chosen": -462.28973388671875, + "logps/rejected": -461.85150146484375, + "loss": 0.6327, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.9306343197822571, + "rewards/margins": 0.24961963295936584, + "rewards/rejected": -1.1802538633346558, + "step": 2750 + }, + { + "epoch": 0.36, + "eval_logits/chosen": -2.6044232845306396, + "eval_logits/rejected": -2.624067783355713, + "eval_logps/chosen": -425.9725036621094, + "eval_logps/rejected": -421.98956298828125, + "eval_loss": 0.6255431175231934, + "eval_rewards/accuracies": 0.6384999752044678, + "eval_rewards/chosen": -0.9322309494018555, + "eval_rewards/margins": 0.2765646278858185, + "eval_rewards/rejected": -1.208795428276062, + "eval_runtime": 196.8045, + "eval_samples_per_second": 10.162, + "eval_steps_per_second": 5.081, + "step": 2750 + }, + { + "epoch": 0.36, + "learning_rate": 4.031401048658892e-06, + "logits/chosen": -2.771268844604492, + "logits/rejected": -2.744429111480713, + "logps/chosen": -424.1585388183594, + "logps/rejected": -430.1105041503906, + "loss": 0.5996, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.855022132396698, + "rewards/margins": 0.3389972746372223, + "rewards/rejected": -1.1940194368362427, + "step": 2760 + }, + { + "epoch": 0.36, + "eval_logits/chosen": -2.6032989025115967, + "eval_logits/rejected": -2.622894048690796, + "eval_logps/chosen": -422.4579772949219, + "eval_logps/rejected": -417.8634948730469, + "eval_loss": 0.625076949596405, + "eval_rewards/accuracies": 0.640999972820282, + "eval_rewards/chosen": -0.8970851898193359, + "eval_rewards/margins": 0.27044978737831116, + "eval_rewards/rejected": -1.1675349473953247, + "eval_runtime": 196.8433, + "eval_samples_per_second": 10.16, + "eval_steps_per_second": 5.08, + "step": 2760 + }, + { + "epoch": 0.36, + "learning_rate": 4.022356629268894e-06, + "logits/chosen": -2.7860825061798096, + "logits/rejected": -2.7870800495147705, + "logps/chosen": -439.6736755371094, + "logps/rejected": -396.2608947753906, + "loss": 0.7056, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.9059756994247437, + "rewards/margins": 0.08050543814897537, + "rewards/rejected": -0.9864810109138489, + "step": 2770 + }, + { + "epoch": 0.36, + "eval_logits/chosen": -2.6097211837768555, + "eval_logits/rejected": -2.628533124923706, + "eval_logps/chosen": -416.4322814941406, + "eval_logps/rejected": -410.8010559082031, + "eval_loss": 0.6238117218017578, + "eval_rewards/accuracies": 0.640999972820282, + "eval_rewards/chosen": -0.8368287086486816, + "eval_rewards/margins": 0.2600819170475006, + "eval_rewards/rejected": -1.0969105958938599, + "eval_runtime": 196.8627, + "eval_samples_per_second": 10.159, + "eval_steps_per_second": 5.08, + "step": 2770 + }, + { + "epoch": 0.36, + "learning_rate": 4.013280430627936e-06, + "logits/chosen": -2.759000301361084, + "logits/rejected": -2.755174160003662, + "logps/chosen": -378.8783264160156, + "logps/rejected": -374.7305603027344, + "loss": 0.606, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.713367760181427, + "rewards/margins": 0.2676360309123993, + "rewards/rejected": -0.9810037612915039, + "step": 2780 + }, + { + "epoch": 0.36, + "eval_logits/chosen": -2.599520444869995, + "eval_logits/rejected": -2.6183393001556396, + "eval_logps/chosen": -417.6534118652344, + "eval_logps/rejected": -412.40203857421875, + "eval_loss": 0.6237169504165649, + "eval_rewards/accuracies": 0.6399999856948853, + "eval_rewards/chosen": -0.849040150642395, + "eval_rewards/margins": 0.26388019323349, + "eval_rewards/rejected": -1.1129202842712402, + "eval_runtime": 196.8922, + "eval_samples_per_second": 10.158, + "eval_steps_per_second": 5.079, + "step": 2780 + }, + { + "epoch": 0.37, + "learning_rate": 4.004172642202002e-06, + "logits/chosen": -2.7675366401672363, + "logits/rejected": -2.753002643585205, + "logps/chosen": -393.2950439453125, + "logps/rejected": -378.9779052734375, + "loss": 0.5751, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -0.8302526473999023, + "rewards/margins": 0.34783655405044556, + "rewards/rejected": -1.1780892610549927, + "step": 2790 + }, + { + "epoch": 0.37, + "eval_logits/chosen": -2.59600830078125, + "eval_logits/rejected": -2.6151397228240967, + "eval_logps/chosen": -416.169677734375, + "eval_logps/rejected": -410.8311767578125, + "eval_loss": 0.6246668100357056, + "eval_rewards/accuracies": 0.6455000042915344, + "eval_rewards/chosen": -0.8342025876045227, + "eval_rewards/margins": 0.2630092203617096, + "eval_rewards/rejected": -1.0972118377685547, + "eval_runtime": 197.1998, + "eval_samples_per_second": 10.142, + "eval_steps_per_second": 5.071, + "step": 2790 + }, + { + "epoch": 0.37, + "learning_rate": 3.995033454116512e-06, + "logits/chosen": -2.806318759918213, + "logits/rejected": -2.800372362136841, + "logps/chosen": -448.93524169921875, + "logps/rejected": -423.07574462890625, + "loss": 0.6504, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.8231765627861023, + "rewards/margins": 0.2010866403579712, + "rewards/rejected": -1.0242632627487183, + "step": 2800 + }, + { + "epoch": 0.37, + "eval_logits/chosen": -2.599107027053833, + "eval_logits/rejected": -2.6187636852264404, + "eval_logps/chosen": -414.1667785644531, + "eval_logps/rejected": -408.644287109375, + "eval_loss": 0.6250008344650269, + "eval_rewards/accuracies": 0.6424999833106995, + "eval_rewards/chosen": -0.8141741156578064, + "eval_rewards/margins": 0.26116856932640076, + "eval_rewards/rejected": -1.0753426551818848, + "eval_runtime": 196.7704, + "eval_samples_per_second": 10.164, + "eval_steps_per_second": 5.082, + "step": 2800 + }, + { + "epoch": 0.37, + "learning_rate": 3.985863057152355e-06, + "logits/chosen": -2.734070301055908, + "logits/rejected": -2.781536817550659, + "logps/chosen": -441.91015625, + "logps/rejected": -449.7099609375, + "loss": 0.5513, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.689423143863678, + "rewards/margins": 0.409343421459198, + "rewards/rejected": -1.098766565322876, + "step": 2810 + }, + { + "epoch": 0.37, + "eval_logits/chosen": -2.59348201751709, + "eval_logits/rejected": -2.6137006282806396, + "eval_logps/chosen": -421.896728515625, + "eval_logps/rejected": -417.6894226074219, + "eval_loss": 0.625147819519043, + "eval_rewards/accuracies": 0.6474999785423279, + "eval_rewards/chosen": -0.8914732933044434, + "eval_rewards/margins": 0.27432069182395935, + "eval_rewards/rejected": -1.1657938957214355, + "eval_runtime": 196.9116, + "eval_samples_per_second": 10.157, + "eval_steps_per_second": 5.078, + "step": 2810 + }, + { + "epoch": 0.37, + "learning_rate": 3.976661642741908e-06, + "logits/chosen": -2.7606282234191895, + "logits/rejected": -2.7800581455230713, + "logps/chosen": -410.58966064453125, + "logps/rejected": -452.03851318359375, + "loss": 0.5198, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.8148131370544434, + "rewards/margins": 0.4949001669883728, + "rewards/rejected": -1.3097132444381714, + "step": 2820 + }, + { + "epoch": 0.37, + "eval_logits/chosen": -2.578408718109131, + "eval_logits/rejected": -2.5996177196502686, + "eval_logps/chosen": -438.55657958984375, + "eval_logps/rejected": -437.1532287597656, + "eval_loss": 0.6287716627120972, + "eval_rewards/accuracies": 0.6365000009536743, + "eval_rewards/chosen": -1.0580713748931885, + "eval_rewards/margins": 0.30236053466796875, + "eval_rewards/rejected": -1.3604320287704468, + "eval_runtime": 197.0899, + "eval_samples_per_second": 10.148, + "eval_steps_per_second": 5.074, + "step": 2820 + }, + { + "epoch": 0.37, + "learning_rate": 3.967429402965035e-06, + "logits/chosen": -2.628810405731201, + "logits/rejected": -2.6278045177459717, + "logps/chosen": -470.0814514160156, + "logps/rejected": -483.7037658691406, + "loss": 0.5981, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.0495673418045044, + "rewards/margins": 0.3852699398994446, + "rewards/rejected": -1.4348372220993042, + "step": 2830 + }, + { + "epoch": 0.37, + "eval_logits/chosen": -2.5737946033477783, + "eval_logits/rejected": -2.595820426940918, + "eval_logps/chosen": -449.00238037109375, + "eval_logps/rejected": -449.2751770019531, + "eval_loss": 0.6319224834442139, + "eval_rewards/accuracies": 0.6355000138282776, + "eval_rewards/chosen": -1.162529468536377, + "eval_rewards/margins": 0.3191223740577698, + "eval_rewards/rejected": -1.481651782989502, + "eval_runtime": 196.8504, + "eval_samples_per_second": 10.16, + "eval_steps_per_second": 5.08, + "step": 2830 + }, + { + "epoch": 0.37, + "learning_rate": 3.958166530545085e-06, + "logits/chosen": -2.759307861328125, + "logits/rejected": -2.7708866596221924, + "logps/chosen": -453.6480407714844, + "logps/rejected": -466.7681579589844, + "loss": 0.6637, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.2561802864074707, + "rewards/margins": 0.22925932705402374, + "rewards/rejected": -1.4854395389556885, + "step": 2840 + }, + { + "epoch": 0.37, + "eval_logits/chosen": -2.5727250576019287, + "eval_logits/rejected": -2.594754695892334, + "eval_logps/chosen": -452.2230529785156, + "eval_logps/rejected": -453.07086181640625, + "eval_loss": 0.6315993666648865, + "eval_rewards/accuracies": 0.6334999799728394, + "eval_rewards/chosen": -1.1947364807128906, + "eval_rewards/margins": 0.3248724937438965, + "eval_rewards/rejected": -1.519608974456787, + "eval_runtime": 197.1339, + "eval_samples_per_second": 10.145, + "eval_steps_per_second": 5.073, + "step": 2840 + }, + { + "epoch": 0.37, + "learning_rate": 3.948873218844863e-06, + "logits/chosen": -2.6876654624938965, + "logits/rejected": -2.7408440113067627, + "logps/chosen": -378.585693359375, + "logps/rejected": -445.4602966308594, + "loss": 0.6351, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -1.1722004413604736, + "rewards/margins": 0.27464979887008667, + "rewards/rejected": -1.446850299835205, + "step": 2850 + }, + { + "epoch": 0.37, + "eval_logits/chosen": -2.570188283920288, + "eval_logits/rejected": -2.592709541320801, + "eval_logps/chosen": -452.1654357910156, + "eval_logps/rejected": -453.17291259765625, + "eval_loss": 0.6327278017997742, + "eval_rewards/accuracies": 0.6349999904632568, + "eval_rewards/chosen": -1.1941603422164917, + "eval_rewards/margins": 0.32646846771240234, + "eval_rewards/rejected": -1.520628809928894, + "eval_runtime": 197.202, + "eval_samples_per_second": 10.142, + "eval_steps_per_second": 5.071, + "step": 2850 + }, + { + "epoch": 0.37, + "learning_rate": 3.939549661862592e-06, + "logits/chosen": -2.680032253265381, + "logits/rejected": -2.698355197906494, + "logps/chosen": -455.81622314453125, + "logps/rejected": -460.41375732421875, + "loss": 0.6009, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -1.1447904109954834, + "rewards/margins": 0.4577345848083496, + "rewards/rejected": -1.602524757385254, + "step": 2860 + }, + { + "epoch": 0.37, + "eval_logits/chosen": -2.5797622203826904, + "eval_logits/rejected": -2.6020236015319824, + "eval_logps/chosen": -452.1235656738281, + "eval_logps/rejected": -453.2584228515625, + "eval_loss": 0.6323604583740234, + "eval_rewards/accuracies": 0.6370000243186951, + "eval_rewards/chosen": -1.1937412023544312, + "eval_rewards/margins": 0.3277431130409241, + "eval_rewards/rejected": -1.5214842557907104, + "eval_runtime": 196.9343, + "eval_samples_per_second": 10.156, + "eval_steps_per_second": 5.078, + "step": 2860 + }, + { + "epoch": 0.38, + "learning_rate": 3.930196054227871e-06, + "logits/chosen": -2.7388813495635986, + "logits/rejected": -2.705418586730957, + "logps/chosen": -421.400634765625, + "logps/rejected": -426.3876037597656, + "loss": 0.6586, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -1.195313572883606, + "rewards/margins": 0.27158278226852417, + "rewards/rejected": -1.4668962955474854, + "step": 2870 + }, + { + "epoch": 0.38, + "eval_logits/chosen": -2.592318058013916, + "eval_logits/rejected": -2.6147069931030273, + "eval_logps/chosen": -444.8150634765625, + "eval_logps/rejected": -444.9077453613281, + "eval_loss": 0.6291281580924988, + "eval_rewards/accuracies": 0.6420000195503235, + "eval_rewards/chosen": -1.1206568479537964, + "eval_rewards/margins": 0.3173206150531769, + "eval_rewards/rejected": -1.4379774332046509, + "eval_runtime": 197.0659, + "eval_samples_per_second": 10.149, + "eval_steps_per_second": 5.074, + "step": 2870 + }, + { + "epoch": 0.38, + "learning_rate": 3.920812591197604e-06, + "logits/chosen": -2.73275089263916, + "logits/rejected": -2.720738410949707, + "logps/chosen": -427.51416015625, + "logps/rejected": -424.65313720703125, + "loss": 0.5718, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.0417402982711792, + "rewards/margins": 0.4426051080226898, + "rewards/rejected": -1.484345555305481, + "step": 2880 + }, + { + "epoch": 0.38, + "eval_logits/chosen": -2.5997185707092285, + "eval_logits/rejected": -2.622008800506592, + "eval_logps/chosen": -433.97705078125, + "eval_logps/rejected": -432.5858154296875, + "eval_loss": 0.6275606155395508, + "eval_rewards/accuracies": 0.6430000066757202, + "eval_rewards/chosen": -1.012276291847229, + "eval_rewards/margins": 0.3024812638759613, + "eval_rewards/rejected": -1.3147575855255127, + "eval_runtime": 196.8295, + "eval_samples_per_second": 10.161, + "eval_steps_per_second": 5.081, + "step": 2880 + }, + { + "epoch": 0.38, + "learning_rate": 3.9113994686519305e-06, + "logits/chosen": -2.7557740211486816, + "logits/rejected": -2.757719039916992, + "logps/chosen": -435.67156982421875, + "logps/rejected": -442.53753662109375, + "loss": 0.6098, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.9510415196418762, + "rewards/margins": 0.32140472531318665, + "rewards/rejected": -1.2724463939666748, + "step": 2890 + }, + { + "epoch": 0.38, + "eval_logits/chosen": -2.592928171157837, + "eval_logits/rejected": -2.6148271560668945, + "eval_logps/chosen": -428.5566101074219, + "eval_logps/rejected": -426.573486328125, + "eval_loss": 0.6265643239021301, + "eval_rewards/accuracies": 0.6464999914169312, + "eval_rewards/chosen": -0.9580718278884888, + "eval_rewards/margins": 0.2965623438358307, + "eval_rewards/rejected": -1.2546342611312866, + "eval_runtime": 196.8971, + "eval_samples_per_second": 10.158, + "eval_steps_per_second": 5.079, + "step": 2890 + }, + { + "epoch": 0.38, + "learning_rate": 3.90195688309013e-06, + "logits/chosen": -2.7411415576934814, + "logits/rejected": -2.716850757598877, + "logps/chosen": -407.24639892578125, + "logps/rejected": -394.56671142578125, + "loss": 0.6727, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.9749676585197449, + "rewards/margins": 0.18547670543193817, + "rewards/rejected": -1.1604443788528442, + "step": 2900 + }, + { + "epoch": 0.38, + "eval_logits/chosen": -2.587193250656128, + "eval_logits/rejected": -2.6085598468780518, + "eval_logps/chosen": -424.8428955078125, + "eval_logps/rejected": -422.52862548828125, + "eval_loss": 0.6264001131057739, + "eval_rewards/accuracies": 0.6495000123977661, + "eval_rewards/chosen": -0.9209350347518921, + "eval_rewards/margins": 0.2932513654232025, + "eval_rewards/rejected": -1.2141865491867065, + "eval_runtime": 196.8371, + "eval_samples_per_second": 10.161, + "eval_steps_per_second": 5.08, + "step": 2900 + }, + { + "epoch": 0.38, + "learning_rate": 3.892485031626527e-06, + "logits/chosen": -2.7525322437286377, + "logits/rejected": -2.740018129348755, + "logps/chosen": -405.2106018066406, + "logps/rejected": -415.08624267578125, + "loss": 0.6066, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.8530766367912292, + "rewards/margins": 0.3423798978328705, + "rewards/rejected": -1.1954563856124878, + "step": 2910 + }, + { + "epoch": 0.38, + "eval_logits/chosen": -2.585651159286499, + "eval_logits/rejected": -2.606193780899048, + "eval_logps/chosen": -422.63525390625, + "eval_logps/rejected": -420.05523681640625, + "eval_loss": 0.624978244304657, + "eval_rewards/accuracies": 0.6489999890327454, + "eval_rewards/chosen": -0.8988585472106934, + "eval_rewards/margins": 0.2905937731266022, + "eval_rewards/rejected": -1.1894524097442627, + "eval_runtime": 196.9551, + "eval_samples_per_second": 10.155, + "eval_steps_per_second": 5.077, + "step": 2910 + }, + { + "epoch": 0.38, + "learning_rate": 3.882984111986371e-06, + "logits/chosen": -2.739992141723633, + "logits/rejected": -2.7450668811798096, + "logps/chosen": -434.47314453125, + "logps/rejected": -429.2943420410156, + "loss": 0.6094, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.8825608491897583, + "rewards/margins": 0.3956514298915863, + "rewards/rejected": -1.2782121896743774, + "step": 2920 + }, + { + "epoch": 0.38, + "eval_logits/chosen": -2.5807759761810303, + "eval_logits/rejected": -2.6009206771850586, + "eval_logps/chosen": -420.12359619140625, + "eval_logps/rejected": -417.17828369140625, + "eval_loss": 0.6240187883377075, + "eval_rewards/accuracies": 0.6480000019073486, + "eval_rewards/chosen": -0.8737419247627258, + "eval_rewards/margins": 0.28694066405296326, + "eval_rewards/rejected": -1.1606824398040771, + "eval_runtime": 196.9637, + "eval_samples_per_second": 10.154, + "eval_steps_per_second": 5.077, + "step": 2920 + }, + { + "epoch": 0.38, + "learning_rate": 3.873454322501711e-06, + "logits/chosen": -2.7816436290740967, + "logits/rejected": -2.789374589920044, + "logps/chosen": -427.786376953125, + "logps/rejected": -419.85321044921875, + "loss": 0.5938, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.7983767986297607, + "rewards/margins": 0.3398052752017975, + "rewards/rejected": -1.1381819248199463, + "step": 2930 + }, + { + "epoch": 0.38, + "eval_logits/chosen": -2.579239845275879, + "eval_logits/rejected": -2.5991451740264893, + "eval_logps/chosen": -418.7933654785156, + "eval_logps/rejected": -415.671630859375, + "eval_loss": 0.6238669753074646, + "eval_rewards/accuracies": 0.6480000019073486, + "eval_rewards/chosen": -0.8604398369789124, + "eval_rewards/margins": 0.28517666459083557, + "eval_rewards/rejected": -1.1456164121627808, + "eval_runtime": 197.282, + "eval_samples_per_second": 10.138, + "eval_steps_per_second": 5.069, + "step": 2930 + }, + { + "epoch": 0.38, + "learning_rate": 3.863895862107255e-06, + "logits/chosen": -2.819079637527466, + "logits/rejected": -2.8470709323883057, + "logps/chosen": -407.00958251953125, + "logps/rejected": -451.1863708496094, + "loss": 0.6039, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.8009134531021118, + "rewards/margins": 0.3192467987537384, + "rewards/rejected": -1.1201602220535278, + "step": 2940 + }, + { + "epoch": 0.38, + "eval_logits/chosen": -2.5733230113983154, + "eval_logits/rejected": -2.5929837226867676, + "eval_logps/chosen": -424.0008239746094, + "eval_logps/rejected": -421.6430969238281, + "eval_loss": 0.6243709921836853, + "eval_rewards/accuracies": 0.6470000147819519, + "eval_rewards/chosen": -0.9125141501426697, + "eval_rewards/margins": 0.2928166389465332, + "eval_rewards/rejected": -1.2053308486938477, + "eval_runtime": 197.0251, + "eval_samples_per_second": 10.151, + "eval_steps_per_second": 5.075, + "step": 2940 + }, + { + "epoch": 0.39, + "learning_rate": 3.854308930336216e-06, + "logits/chosen": -2.753868579864502, + "logits/rejected": -2.7251639366149902, + "logps/chosen": -478.91741943359375, + "logps/rejected": -445.16241455078125, + "loss": 0.5952, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.8964638710021973, + "rewards/margins": 0.3466527760028839, + "rewards/rejected": -1.2431166172027588, + "step": 2950 + }, + { + "epoch": 0.39, + "eval_logits/chosen": -2.5684099197387695, + "eval_logits/rejected": -2.5877881050109863, + "eval_logps/chosen": -424.83856201171875, + "eval_logps/rejected": -422.58837890625, + "eval_loss": 0.6245684027671814, + "eval_rewards/accuracies": 0.6445000171661377, + "eval_rewards/chosen": -0.9208914041519165, + "eval_rewards/margins": 0.29389217495918274, + "eval_rewards/rejected": -1.2147835493087769, + "eval_runtime": 196.9168, + "eval_samples_per_second": 10.157, + "eval_steps_per_second": 5.078, + "step": 2950 + }, + { + "epoch": 0.39, + "learning_rate": 3.844693727316151e-06, + "logits/chosen": -2.7385358810424805, + "logits/rejected": -2.7280914783477783, + "logps/chosen": -437.2637634277344, + "logps/rejected": -414.7500915527344, + "loss": 0.6394, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.9794430732727051, + "rewards/margins": 0.25574856996536255, + "rewards/rejected": -1.2351915836334229, + "step": 2960 + }, + { + "epoch": 0.39, + "eval_logits/chosen": -2.565891981124878, + "eval_logits/rejected": -2.584840774536133, + "eval_logps/chosen": -423.7319641113281, + "eval_logps/rejected": -421.17254638671875, + "eval_loss": 0.6237717270851135, + "eval_rewards/accuracies": 0.6474999785423279, + "eval_rewards/chosen": -0.9098256826400757, + "eval_rewards/margins": 0.2907992899417877, + "eval_rewards/rejected": -1.2006248235702515, + "eval_runtime": 196.935, + "eval_samples_per_second": 10.156, + "eval_steps_per_second": 5.078, + "step": 2960 + }, + { + "epoch": 0.39, + "learning_rate": 3.835050453764779e-06, + "logits/chosen": -2.671020746231079, + "logits/rejected": -2.7046775817871094, + "logps/chosen": -383.79461669921875, + "logps/rejected": -425.920654296875, + "loss": 0.509, + "rewards/accuracies": 0.8125, + "rewards/chosen": -0.8011902570724487, + "rewards/margins": 0.6181550621986389, + "rewards/rejected": -1.4193452596664429, + "step": 2970 + }, + { + "epoch": 0.39, + "eval_logits/chosen": -2.560662269592285, + "eval_logits/rejected": -2.579688787460327, + "eval_logps/chosen": -426.69012451171875, + "eval_logps/rejected": -424.6265869140625, + "eval_loss": 0.6250145435333252, + "eval_rewards/accuracies": 0.6489999890327454, + "eval_rewards/chosen": -0.9394070506095886, + "eval_rewards/margins": 0.2957586944103241, + "eval_rewards/rejected": -1.2351657152175903, + "eval_runtime": 196.9601, + "eval_samples_per_second": 10.154, + "eval_steps_per_second": 5.077, + "step": 2970 + }, + { + "epoch": 0.39, + "learning_rate": 3.825379310985792e-06, + "logits/chosen": -2.7324655055999756, + "logits/rejected": -2.7066054344177246, + "logps/chosen": -405.8730163574219, + "logps/rejected": -424.6570739746094, + "loss": 0.6274, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.9839998483657837, + "rewards/margins": 0.273231565952301, + "rewards/rejected": -1.2572312355041504, + "step": 2980 + }, + { + "epoch": 0.39, + "eval_logits/chosen": -2.5506222248077393, + "eval_logits/rejected": -2.570014715194702, + "eval_logps/chosen": -430.77142333984375, + "eval_logps/rejected": -429.4747314453125, + "eval_loss": 0.6260868310928345, + "eval_rewards/accuracies": 0.6495000123977661, + "eval_rewards/chosen": -0.9802199602127075, + "eval_rewards/margins": 0.3034266531467438, + "eval_rewards/rejected": -1.283646583557129, + "eval_runtime": 196.9949, + "eval_samples_per_second": 10.153, + "eval_steps_per_second": 5.076, + "step": 2980 + }, + { + "epoch": 0.39, + "learning_rate": 3.815680500864651e-06, + "logits/chosen": -2.7649083137512207, + "logits/rejected": -2.783748149871826, + "logps/chosen": -464.8194885253906, + "logps/rejected": -430.9786071777344, + "loss": 0.6132, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.8606418371200562, + "rewards/margins": 0.2772321403026581, + "rewards/rejected": -1.1378740072250366, + "step": 2990 + }, + { + "epoch": 0.39, + "eval_logits/chosen": -2.546297073364258, + "eval_logits/rejected": -2.566033124923706, + "eval_logps/chosen": -433.58160400390625, + "eval_logps/rejected": -432.9014587402344, + "eval_loss": 0.6257321834564209, + "eval_rewards/accuracies": 0.6464999914169312, + "eval_rewards/chosen": -1.00832200050354, + "eval_rewards/margins": 0.30959272384643555, + "eval_rewards/rejected": -1.3179147243499756, + "eval_runtime": 196.9553, + "eval_samples_per_second": 10.155, + "eval_steps_per_second": 5.077, + "step": 2990 + }, + { + "epoch": 0.39, + "learning_rate": 3.80595422586438e-06, + "logits/chosen": -2.7633798122406006, + "logits/rejected": -2.7644972801208496, + "logps/chosen": -490.32781982421875, + "logps/rejected": -421.77471923828125, + "loss": 0.6322, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.9230520129203796, + "rewards/margins": 0.32078996300697327, + "rewards/rejected": -1.2438418865203857, + "step": 3000 + }, + { + "epoch": 0.39, + "eval_logits/chosen": -2.5444023609161377, + "eval_logits/rejected": -2.564011335372925, + "eval_logps/chosen": -433.4969787597656, + "eval_logps/rejected": -432.7070007324219, + "eval_loss": 0.6249431371688843, + "eval_rewards/accuracies": 0.6504999995231628, + "eval_rewards/chosen": -1.0074756145477295, + "eval_rewards/margins": 0.30849388241767883, + "eval_rewards/rejected": -1.315969467163086, + "eval_runtime": 196.9191, + "eval_samples_per_second": 10.156, + "eval_steps_per_second": 5.078, + "step": 3000 + }, + { + "epoch": 0.39, + "learning_rate": 3.7962006890213266e-06, + "logits/chosen": -2.6365857124328613, + "logits/rejected": -2.5725252628326416, + "logps/chosen": -401.45556640625, + "logps/rejected": -403.39056396484375, + "loss": 0.6969, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -1.04611074924469, + "rewards/margins": 0.12471544742584229, + "rewards/rejected": -1.1708260774612427, + "step": 3010 + }, + { + "epoch": 0.39, + "eval_logits/chosen": -2.5364789962768555, + "eval_logits/rejected": -2.555938482284546, + "eval_logps/chosen": -430.24176025390625, + "eval_logps/rejected": -428.698486328125, + "eval_loss": 0.6241666674613953, + "eval_rewards/accuracies": 0.6504999995231628, + "eval_rewards/chosen": -0.9749231934547424, + "eval_rewards/margins": 0.30096182227134705, + "eval_rewards/rejected": -1.275884985923767, + "eval_runtime": 196.9515, + "eval_samples_per_second": 10.155, + "eval_steps_per_second": 5.077, + "step": 3010 + }, + { + "epoch": 0.4, + "learning_rate": 3.7864200939409336e-06, + "logits/chosen": -2.708780527114868, + "logits/rejected": -2.6882429122924805, + "logps/chosen": -422.79168701171875, + "logps/rejected": -405.7910461425781, + "loss": 0.632, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.8642172813415527, + "rewards/margins": 0.2687808573246002, + "rewards/rejected": -1.1329978704452515, + "step": 3020 + }, + { + "epoch": 0.4, + "eval_logits/chosen": -2.543612480163574, + "eval_logits/rejected": -2.562998056411743, + "eval_logps/chosen": -426.5962219238281, + "eval_logps/rejected": -424.1770324707031, + "eval_loss": 0.6225207448005676, + "eval_rewards/accuracies": 0.6495000123977661, + "eval_rewards/chosen": -0.9384684562683105, + "eval_rewards/margins": 0.2922017276287079, + "eval_rewards/rejected": -1.2306702136993408, + "eval_runtime": 197.1525, + "eval_samples_per_second": 10.144, + "eval_steps_per_second": 5.072, + "step": 3020 + }, + { + "epoch": 0.4, + "learning_rate": 3.7766126447934857e-06, + "logits/chosen": -2.721001148223877, + "logits/rejected": -2.756192684173584, + "logps/chosen": -382.03985595703125, + "logps/rejected": -398.6980895996094, + "loss": 0.6207, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.9074760675430298, + "rewards/margins": 0.2535129487514496, + "rewards/rejected": -1.1609890460968018, + "step": 3030 + }, + { + "epoch": 0.4, + "eval_logits/chosen": -2.54727840423584, + "eval_logits/rejected": -2.5665431022644043, + "eval_logps/chosen": -426.14080810546875, + "eval_logps/rejected": -423.4004211425781, + "eval_loss": 0.6219916939735413, + "eval_rewards/accuracies": 0.6510000228881836, + "eval_rewards/chosen": -0.9339138269424438, + "eval_rewards/margins": 0.28898999094963074, + "eval_rewards/rejected": -1.2229039669036865, + "eval_runtime": 197.0328, + "eval_samples_per_second": 10.151, + "eval_steps_per_second": 5.075, + "step": 3030 + }, + { + "epoch": 0.4, + "learning_rate": 3.766778546309847e-06, + "logits/chosen": -2.783926010131836, + "logits/rejected": -2.7826411724090576, + "logps/chosen": -457.7347106933594, + "logps/rejected": -378.9781188964844, + "loss": 0.6059, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.8478718996047974, + "rewards/margins": 0.3275406062602997, + "rewards/rejected": -1.1754125356674194, + "step": 3040 + }, + { + "epoch": 0.4, + "eval_logits/chosen": -2.5473427772521973, + "eval_logits/rejected": -2.5660293102264404, + "eval_logps/chosen": -423.0438232421875, + "eval_logps/rejected": -419.7774353027344, + "eval_loss": 0.6212862730026245, + "eval_rewards/accuracies": 0.6554999947547913, + "eval_rewards/chosen": -0.9029442071914673, + "eval_rewards/margins": 0.2837299108505249, + "eval_rewards/rejected": -1.1866742372512817, + "eval_runtime": 197.0258, + "eval_samples_per_second": 10.151, + "eval_steps_per_second": 5.075, + "step": 3040 + }, + { + "epoch": 0.4, + "learning_rate": 3.7569180037771868e-06, + "logits/chosen": -2.7684082984924316, + "logits/rejected": -2.805574417114258, + "logps/chosen": -416.99114990234375, + "logps/rejected": -437.90399169921875, + "loss": 0.6284, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.9409763216972351, + "rewards/margins": 0.28491485118865967, + "rewards/rejected": -1.22589111328125, + "step": 3050 + }, + { + "epoch": 0.4, + "eval_logits/chosen": -2.548220634460449, + "eval_logits/rejected": -2.567086696624756, + "eval_logps/chosen": -421.3135070800781, + "eval_logps/rejected": -417.7491760253906, + "eval_loss": 0.621475875377655, + "eval_rewards/accuracies": 0.6539999842643738, + "eval_rewards/chosen": -0.885640561580658, + "eval_rewards/margins": 0.28075098991394043, + "eval_rewards/rejected": -1.1663916110992432, + "eval_runtime": 197.2358, + "eval_samples_per_second": 10.14, + "eval_steps_per_second": 5.07, + "step": 3050 + }, + { + "epoch": 0.4, + "learning_rate": 3.7470312230346955e-06, + "logits/chosen": -2.6531074047088623, + "logits/rejected": -2.668549060821533, + "logps/chosen": -469.8207092285156, + "logps/rejected": -427.02337646484375, + "loss": 0.5785, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.8234984278678894, + "rewards/margins": 0.3786749541759491, + "rewards/rejected": -1.2021734714508057, + "step": 3060 + }, + { + "epoch": 0.4, + "eval_logits/chosen": -2.5429956912994385, + "eval_logits/rejected": -2.5619399547576904, + "eval_logps/chosen": -421.4232482910156, + "eval_logps/rejected": -417.9524230957031, + "eval_loss": 0.6219341158866882, + "eval_rewards/accuracies": 0.6504999995231628, + "eval_rewards/chosen": -0.8867385983467102, + "eval_rewards/margins": 0.28168606758117676, + "eval_rewards/rejected": -1.1684246063232422, + "eval_runtime": 196.8459, + "eval_samples_per_second": 10.16, + "eval_steps_per_second": 5.08, + "step": 3060 + }, + { + "epoch": 0.4, + "learning_rate": 3.7371184104692857e-06, + "logits/chosen": -2.8001978397369385, + "logits/rejected": -2.784719944000244, + "logps/chosen": -487.9359436035156, + "logps/rejected": -442.556396484375, + "loss": 0.6048, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.8620021939277649, + "rewards/margins": 0.3240812420845032, + "rewards/rejected": -1.186083436012268, + "step": 3070 + }, + { + "epoch": 0.4, + "eval_logits/chosen": -2.540199041366577, + "eval_logits/rejected": -2.5592684745788574, + "eval_logps/chosen": -427.2795715332031, + "eval_logps/rejected": -424.89849853515625, + "eval_loss": 0.621972918510437, + "eval_rewards/accuracies": 0.6510000228881836, + "eval_rewards/chosen": -0.9453017115592957, + "eval_rewards/margins": 0.2925828993320465, + "eval_rewards/rejected": -1.2378844022750854, + "eval_runtime": 196.8413, + "eval_samples_per_second": 10.16, + "eval_steps_per_second": 5.08, + "step": 3070 + }, + { + "epoch": 0.4, + "learning_rate": 3.727179773011289e-06, + "logits/chosen": -2.624542713165283, + "logits/rejected": -2.65124773979187, + "logps/chosen": -452.1876525878906, + "logps/rejected": -441.6802673339844, + "loss": 0.6668, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -1.0772656202316284, + "rewards/margins": 0.20561933517456055, + "rewards/rejected": -1.2828850746154785, + "step": 3080 + }, + { + "epoch": 0.4, + "eval_logits/chosen": -2.541574001312256, + "eval_logits/rejected": -2.5601866245269775, + "eval_logps/chosen": -436.0604248046875, + "eval_logps/rejected": -434.9568786621094, + "eval_loss": 0.6212599873542786, + "eval_rewards/accuracies": 0.6510000228881836, + "eval_rewards/chosen": -1.0331101417541504, + "eval_rewards/margins": 0.3053584396839142, + "eval_rewards/rejected": -1.3384685516357422, + "eval_runtime": 196.6065, + "eval_samples_per_second": 10.173, + "eval_steps_per_second": 5.086, + "step": 3080 + }, + { + "epoch": 0.4, + "learning_rate": 3.717215518130127e-06, + "logits/chosen": -2.5789878368377686, + "logits/rejected": -2.554394006729126, + "logps/chosen": -426.98602294921875, + "logps/rejected": -426.3089294433594, + "loss": 0.6687, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -1.1472880840301514, + "rewards/margins": 0.18235152959823608, + "rewards/rejected": -1.3296396732330322, + "step": 3090 + }, + { + "epoch": 0.4, + "eval_logits/chosen": -2.5425422191619873, + "eval_logits/rejected": -2.560896873474121, + "eval_logps/chosen": -438.49700927734375, + "eval_logps/rejected": -437.4830322265625, + "eval_loss": 0.620273768901825, + "eval_rewards/accuracies": 0.6524999737739563, + "eval_rewards/chosen": -1.0574753284454346, + "eval_rewards/margins": 0.3062548339366913, + "eval_rewards/rejected": -1.3637299537658691, + "eval_runtime": 196.7941, + "eval_samples_per_second": 10.163, + "eval_steps_per_second": 5.081, + "step": 3090 + }, + { + "epoch": 0.41, + "learning_rate": 3.7072258538299923e-06, + "logits/chosen": -2.782985210418701, + "logits/rejected": -2.7424604892730713, + "logps/chosen": -529.3215942382812, + "logps/rejected": -452.83001708984375, + "loss": 0.5243, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.0583951473236084, + "rewards/margins": 0.495781272649765, + "rewards/rejected": -1.5541764497756958, + "step": 3100 + }, + { + "epoch": 0.41, + "eval_logits/chosen": -2.5389695167541504, + "eval_logits/rejected": -2.5574052333831787, + "eval_logps/chosen": -441.1709289550781, + "eval_logps/rejected": -440.71368408203125, + "eval_loss": 0.6207247376441956, + "eval_rewards/accuracies": 0.6514999866485596, + "eval_rewards/chosen": -1.0842152833938599, + "eval_rewards/margins": 0.31182152032852173, + "eval_rewards/rejected": -1.396036982536316, + "eval_runtime": 196.8853, + "eval_samples_per_second": 10.158, + "eval_steps_per_second": 5.079, + "step": 3100 + }, + { + "epoch": 0.41, + "learning_rate": 3.6972109886454933e-06, + "logits/chosen": -2.6880383491516113, + "logits/rejected": -2.7003605365753174, + "logps/chosen": -433.5580139160156, + "logps/rejected": -437.60015869140625, + "loss": 0.6149, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.1408096551895142, + "rewards/margins": 0.37903618812561035, + "rewards/rejected": -1.519845962524414, + "step": 3110 + }, + { + "epoch": 0.41, + "eval_logits/chosen": -2.533323287963867, + "eval_logits/rejected": -2.5518412590026855, + "eval_logps/chosen": -440.2718811035156, + "eval_logps/rejected": -439.7763366699219, + "eval_loss": 0.6214109063148499, + "eval_rewards/accuracies": 0.6524999737739563, + "eval_rewards/chosen": -1.0752249956130981, + "eval_rewards/margins": 0.3114384114742279, + "eval_rewards/rejected": -1.3866634368896484, + "eval_runtime": 197.1731, + "eval_samples_per_second": 10.143, + "eval_steps_per_second": 5.072, + "step": 3110 + }, + { + "epoch": 0.41, + "learning_rate": 3.687171131637314e-06, + "logits/chosen": -2.551008939743042, + "logits/rejected": -2.5964572429656982, + "logps/chosen": -450.5162658691406, + "logps/rejected": -437.95501708984375, + "loss": 0.6346, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.066068172454834, + "rewards/margins": 0.3243589699268341, + "rewards/rejected": -1.3904269933700562, + "step": 3120 + }, + { + "epoch": 0.41, + "eval_logits/chosen": -2.5467216968536377, + "eval_logits/rejected": -2.5647366046905518, + "eval_logps/chosen": -437.42950439453125, + "eval_logps/rejected": -436.2012634277344, + "eval_loss": 0.6199201941490173, + "eval_rewards/accuracies": 0.6499999761581421, + "eval_rewards/chosen": -1.0468007326126099, + "eval_rewards/margins": 0.3041113018989563, + "eval_rewards/rejected": -1.350912094116211, + "eval_runtime": 196.9923, + "eval_samples_per_second": 10.153, + "eval_steps_per_second": 5.076, + "step": 3120 + }, + { + "epoch": 0.41, + "learning_rate": 3.677106492387839e-06, + "logits/chosen": -2.7704672813415527, + "logits/rejected": -2.72668719291687, + "logps/chosen": -449.903564453125, + "logps/rejected": -406.24029541015625, + "loss": 0.637, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.063520908355713, + "rewards/margins": 0.250002920627594, + "rewards/rejected": -1.3135238885879517, + "step": 3130 + }, + { + "epoch": 0.41, + "eval_logits/chosen": -2.5496647357940674, + "eval_logits/rejected": -2.567011833190918, + "eval_logps/chosen": -441.9938049316406, + "eval_logps/rejected": -440.9230651855469, + "eval_loss": 0.6196883320808411, + "eval_rewards/accuracies": 0.6495000123977661, + "eval_rewards/chosen": -1.0924441814422607, + "eval_rewards/margins": 0.3056861162185669, + "eval_rewards/rejected": -1.398130178451538, + "eval_runtime": 197.0452, + "eval_samples_per_second": 10.15, + "eval_steps_per_second": 5.075, + "step": 3130 + }, + { + "epoch": 0.41, + "learning_rate": 3.6670172809967865e-06, + "logits/chosen": -2.605725049972534, + "logits/rejected": -2.5953238010406494, + "logps/chosen": -384.080078125, + "logps/rejected": -388.01873779296875, + "loss": 0.6076, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.2000583410263062, + "rewards/margins": 0.28716519474983215, + "rewards/rejected": -1.4872233867645264, + "step": 3140 + }, + { + "epoch": 0.41, + "eval_logits/chosen": -2.545055866241455, + "eval_logits/rejected": -2.561589002609253, + "eval_logps/chosen": -448.6625671386719, + "eval_logps/rejected": -448.001708984375, + "eval_loss": 0.620514988899231, + "eval_rewards/accuracies": 0.6514999866485596, + "eval_rewards/chosen": -1.159131646156311, + "eval_rewards/margins": 0.3097854554653168, + "eval_rewards/rejected": -1.4689171314239502, + "eval_runtime": 196.9221, + "eval_samples_per_second": 10.156, + "eval_steps_per_second": 5.078, + "step": 3140 + }, + { + "epoch": 0.41, + "learning_rate": 3.6569037080768153e-06, + "logits/chosen": -2.7470648288726807, + "logits/rejected": -2.7412030696868896, + "logps/chosen": -421.9847106933594, + "logps/rejected": -458.28314208984375, + "loss": 0.5972, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -1.1772805452346802, + "rewards/margins": 0.35452547669410706, + "rewards/rejected": -1.5318059921264648, + "step": 3150 + }, + { + "epoch": 0.41, + "eval_logits/chosen": -2.5433573722839355, + "eval_logits/rejected": -2.5591485500335693, + "eval_logps/chosen": -454.7508850097656, + "eval_logps/rejected": -454.5576477050781, + "eval_loss": 0.6221292018890381, + "eval_rewards/accuracies": 0.6514999866485596, + "eval_rewards/chosen": -1.2200146913528442, + "eval_rewards/margins": 0.3144617974758148, + "eval_rewards/rejected": -1.534476399421692, + "eval_runtime": 196.7531, + "eval_samples_per_second": 10.165, + "eval_steps_per_second": 5.083, + "step": 3150 + }, + { + "epoch": 0.41, + "learning_rate": 3.646765984749137e-06, + "logits/chosen": -2.7149806022644043, + "logits/rejected": -2.761202096939087, + "logps/chosen": -453.30419921875, + "logps/rejected": -484.2044982910156, + "loss": 0.6125, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -1.237396240234375, + "rewards/margins": 0.3510487675666809, + "rewards/rejected": -1.5884450674057007, + "step": 3160 + }, + { + "epoch": 0.41, + "eval_logits/chosen": -2.5344886779785156, + "eval_logits/rejected": -2.5501887798309326, + "eval_logps/chosen": -454.9515686035156, + "eval_logps/rejected": -454.9926452636719, + "eval_loss": 0.6223093271255493, + "eval_rewards/accuracies": 0.6514999866485596, + "eval_rewards/chosen": -1.2220218181610107, + "eval_rewards/margins": 0.3168042004108429, + "eval_rewards/rejected": -1.5388261079788208, + "eval_runtime": 197.0041, + "eval_samples_per_second": 10.152, + "eval_steps_per_second": 5.076, + "step": 3160 + }, + { + "epoch": 0.41, + "learning_rate": 3.6366043226391e-06, + "logits/chosen": -2.574552536010742, + "logits/rejected": -2.5905323028564453, + "logps/chosen": -460.30157470703125, + "logps/rejected": -439.5680236816406, + "loss": 0.5862, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -1.1437591314315796, + "rewards/margins": 0.36615657806396484, + "rewards/rejected": -1.5099157094955444, + "step": 3170 + }, + { + "epoch": 0.41, + "eval_logits/chosen": -2.5259342193603516, + "eval_logits/rejected": -2.541714906692505, + "eval_logps/chosen": -454.07659912109375, + "eval_logps/rejected": -454.0869140625, + "eval_loss": 0.6219574809074402, + "eval_rewards/accuracies": 0.6510000228881836, + "eval_rewards/chosen": -1.2132717370986938, + "eval_rewards/margins": 0.3164973556995392, + "eval_rewards/rejected": -1.5297691822052002, + "eval_runtime": 196.8455, + "eval_samples_per_second": 10.16, + "eval_steps_per_second": 5.08, + "step": 3170 + }, + { + "epoch": 0.42, + "learning_rate": 3.6264189338717766e-06, + "logits/chosen": -2.816720962524414, + "logits/rejected": -2.7663235664367676, + "logps/chosen": -458.0995178222656, + "logps/rejected": -445.95892333984375, + "loss": 0.6327, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.203072190284729, + "rewards/margins": 0.29866084456443787, + "rewards/rejected": -1.5017330646514893, + "step": 3180 + }, + { + "epoch": 0.42, + "eval_logits/chosen": -2.5217185020446777, + "eval_logits/rejected": -2.538130521774292, + "eval_logps/chosen": -442.9092102050781, + "eval_logps/rejected": -442.09063720703125, + "eval_loss": 0.6193828582763672, + "eval_rewards/accuracies": 0.6554999947547913, + "eval_rewards/chosen": -1.1015980243682861, + "eval_rewards/margins": 0.30820852518081665, + "eval_rewards/rejected": -1.409806489944458, + "eval_runtime": 196.9243, + "eval_samples_per_second": 10.156, + "eval_steps_per_second": 5.078, + "step": 3180 + }, + { + "epoch": 0.42, + "learning_rate": 3.6162100310675334e-06, + "logits/chosen": -2.7207634449005127, + "logits/rejected": -2.713285446166992, + "logps/chosen": -447.8941345214844, + "logps/rejected": -440.34698486328125, + "loss": 0.6965, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -1.179276466369629, + "rewards/margins": 0.14491409063339233, + "rewards/rejected": -1.324190616607666, + "step": 3190 + }, + { + "epoch": 0.42, + "eval_logits/chosen": -2.523361921310425, + "eval_logits/rejected": -2.5399725437164307, + "eval_logps/chosen": -432.1943664550781, + "eval_logps/rejected": -430.0140686035156, + "eval_loss": 0.6185163259506226, + "eval_rewards/accuracies": 0.6524999737739563, + "eval_rewards/chosen": -0.9944491982460022, + "eval_rewards/margins": 0.2945913076400757, + "eval_rewards/rejected": -1.2890405654907227, + "eval_runtime": 196.8913, + "eval_samples_per_second": 10.158, + "eval_steps_per_second": 5.079, + "step": 3190 + }, + { + "epoch": 0.42, + "learning_rate": 3.605977827337596e-06, + "logits/chosen": -2.6888821125030518, + "logits/rejected": -2.666752338409424, + "logps/chosen": -414.6795349121094, + "logps/rejected": -418.9081115722656, + "loss": 0.6283, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.9674969911575317, + "rewards/margins": 0.2940976917743683, + "rewards/rejected": -1.261594533920288, + "step": 3200 + }, + { + "epoch": 0.42, + "eval_logits/chosen": -2.5268585681915283, + "eval_logits/rejected": -2.5438408851623535, + "eval_logps/chosen": -426.7724304199219, + "eval_logps/rejected": -423.63458251953125, + "eval_loss": 0.6185536980628967, + "eval_rewards/accuracies": 0.6575000286102295, + "eval_rewards/chosen": -0.9402304887771606, + "eval_rewards/margins": 0.28501537442207336, + "eval_rewards/rejected": -1.2252458333969116, + "eval_runtime": 196.6028, + "eval_samples_per_second": 10.173, + "eval_steps_per_second": 5.086, + "step": 3200 + }, + { + "epoch": 0.42, + "learning_rate": 3.595722536279595e-06, + "logits/chosen": -2.791154146194458, + "logits/rejected": -2.726059913635254, + "logps/chosen": -487.59375, + "logps/rejected": -433.30413818359375, + "loss": 0.5662, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.8068972826004028, + "rewards/margins": 0.4043118357658386, + "rewards/rejected": -1.2112090587615967, + "step": 3210 + }, + { + "epoch": 0.42, + "eval_logits/chosen": -2.523442268371582, + "eval_logits/rejected": -2.5411760807037354, + "eval_logps/chosen": -419.7383728027344, + "eval_logps/rejected": -415.713134765625, + "eval_loss": 0.6193069815635681, + "eval_rewards/accuracies": 0.6575000286102295, + "eval_rewards/chosen": -0.8698898553848267, + "eval_rewards/margins": 0.27614113688468933, + "eval_rewards/rejected": -1.1460310220718384, + "eval_runtime": 197.011, + "eval_samples_per_second": 10.152, + "eval_steps_per_second": 5.076, + "step": 3210 + }, + { + "epoch": 0.42, + "learning_rate": 3.58544437197311e-06, + "logits/chosen": -2.6719181537628174, + "logits/rejected": -2.6700119972229004, + "logps/chosen": -420.42791748046875, + "logps/rejected": -409.15576171875, + "loss": 0.6209, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.7765008807182312, + "rewards/margins": 0.2826058268547058, + "rewards/rejected": -1.059106707572937, + "step": 3220 + }, + { + "epoch": 0.42, + "eval_logits/chosen": -2.522939443588257, + "eval_logits/rejected": -2.541029691696167, + "eval_logps/chosen": -415.0108947753906, + "eval_logps/rejected": -410.4877014160156, + "eval_loss": 0.6199746131896973, + "eval_rewards/accuracies": 0.6589999794960022, + "eval_rewards/chosen": -0.8226147890090942, + "eval_rewards/margins": 0.27116167545318604, + "eval_rewards/rejected": -1.0937764644622803, + "eval_runtime": 197.4604, + "eval_samples_per_second": 10.129, + "eval_steps_per_second": 5.064, + "step": 3220 + }, + { + "epoch": 0.42, + "learning_rate": 3.5751435489752025e-06, + "logits/chosen": -2.658782958984375, + "logits/rejected": -2.6702182292938232, + "logps/chosen": -390.2605895996094, + "logps/rejected": -382.9984436035156, + "loss": 0.6044, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.7846366763114929, + "rewards/margins": 0.31734299659729004, + "rewards/rejected": -1.1019797325134277, + "step": 3230 + }, + { + "epoch": 0.42, + "eval_logits/chosen": -2.519177198410034, + "eval_logits/rejected": -2.537327289581299, + "eval_logps/chosen": -416.0750732421875, + "eval_logps/rejected": -412.0194396972656, + "eval_loss": 0.6196084022521973, + "eval_rewards/accuracies": 0.6575000286102295, + "eval_rewards/chosen": -0.8332566618919373, + "eval_rewards/margins": 0.2758375108242035, + "eval_rewards/rejected": -1.1090940237045288, + "eval_runtime": 196.9088, + "eval_samples_per_second": 10.157, + "eval_steps_per_second": 5.078, + "step": 3230 + }, + { + "epoch": 0.42, + "learning_rate": 3.5648202823159317e-06, + "logits/chosen": -2.649294137954712, + "logits/rejected": -2.7054905891418457, + "logps/chosen": -371.8926086425781, + "logps/rejected": -439.46844482421875, + "loss": 0.5752, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.7728925943374634, + "rewards/margins": 0.3550662100315094, + "rewards/rejected": -1.12795889377594, + "step": 3240 + }, + { + "epoch": 0.42, + "eval_logits/chosen": -2.492098569869995, + "eval_logits/rejected": -2.510578155517578, + "eval_logps/chosen": -423.4895935058594, + "eval_logps/rejected": -421.0146789550781, + "eval_loss": 0.6204763054847717, + "eval_rewards/accuracies": 0.6549999713897705, + "eval_rewards/chosen": -0.9074018001556396, + "eval_rewards/margins": 0.29164472222328186, + "eval_rewards/rejected": -1.1990464925765991, + "eval_runtime": 197.1081, + "eval_samples_per_second": 10.147, + "eval_steps_per_second": 5.073, + "step": 3240 + }, + { + "epoch": 0.43, + "learning_rate": 3.554474787493873e-06, + "logits/chosen": -2.5724246501922607, + "logits/rejected": -2.557253360748291, + "logps/chosen": -461.4246520996094, + "logps/rejected": -445.2750549316406, + "loss": 0.624, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.9212248921394348, + "rewards/margins": 0.33202242851257324, + "rewards/rejected": -1.2532472610473633, + "step": 3250 + }, + { + "epoch": 0.43, + "eval_logits/chosen": -2.46036958694458, + "eval_logits/rejected": -2.4790313243865967, + "eval_logps/chosen": -429.57958984375, + "eval_logps/rejected": -428.1665344238281, + "eval_loss": 0.6233686208724976, + "eval_rewards/accuracies": 0.6520000100135803, + "eval_rewards/chosen": -0.9683018922805786, + "eval_rewards/margins": 0.3022630512714386, + "eval_rewards/rejected": -1.2705649137496948, + "eval_runtime": 196.8966, + "eval_samples_per_second": 10.158, + "eval_steps_per_second": 5.079, + "step": 3250 + }, + { + "epoch": 0.43, + "learning_rate": 3.5441072804716125e-06, + "logits/chosen": -2.6319198608398438, + "logits/rejected": -2.6219584941864014, + "logps/chosen": -468.0224609375, + "logps/rejected": -497.96771240234375, + "loss": 0.5992, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.9127866625785828, + "rewards/margins": 0.37219464778900146, + "rewards/rejected": -1.2849812507629395, + "step": 3260 + }, + { + "epoch": 0.43, + "eval_logits/chosen": -2.4434595108032227, + "eval_logits/rejected": -2.4621036052703857, + "eval_logps/chosen": -435.7987976074219, + "eval_logps/rejected": -435.3685302734375, + "eval_loss": 0.6249234676361084, + "eval_rewards/accuracies": 0.6510000228881836, + "eval_rewards/chosen": -1.030493974685669, + "eval_rewards/margins": 0.3120914101600647, + "eval_rewards/rejected": -1.3425853252410889, + "eval_runtime": 197.064, + "eval_samples_per_second": 10.149, + "eval_steps_per_second": 5.074, + "step": 3260 + }, + { + "epoch": 0.43, + "learning_rate": 3.5337179776712427e-06, + "logits/chosen": -2.5710291862487793, + "logits/rejected": -2.5899360179901123, + "logps/chosen": -414.93377685546875, + "logps/rejected": -444.3470764160156, + "loss": 0.5932, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.9402750134468079, + "rewards/margins": 0.4441652297973633, + "rewards/rejected": -1.3844401836395264, + "step": 3270 + }, + { + "epoch": 0.43, + "eval_logits/chosen": -2.4319798946380615, + "eval_logits/rejected": -2.450648784637451, + "eval_logps/chosen": -440.21142578125, + "eval_logps/rejected": -440.3194885253906, + "eval_loss": 0.6256486773490906, + "eval_rewards/accuracies": 0.6489999890327454, + "eval_rewards/chosen": -1.074620246887207, + "eval_rewards/margins": 0.3174746036529541, + "eval_rewards/rejected": -1.3920949697494507, + "eval_runtime": 197.099, + "eval_samples_per_second": 10.147, + "eval_steps_per_second": 5.074, + "step": 3270 + }, + { + "epoch": 0.43, + "learning_rate": 3.5233070959698445e-06, + "logits/chosen": -2.6314568519592285, + "logits/rejected": -2.6279854774475098, + "logps/chosen": -482.607177734375, + "logps/rejected": -463.41656494140625, + "loss": 0.6325, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -1.0531612634658813, + "rewards/margins": 0.2862653136253357, + "rewards/rejected": -1.3394266366958618, + "step": 3280 + }, + { + "epoch": 0.43, + "eval_logits/chosen": -2.4338669776916504, + "eval_logits/rejected": -2.452253818511963, + "eval_logps/chosen": -442.3782043457031, + "eval_logps/rejected": -442.60601806640625, + "eval_loss": 0.6250951886177063, + "eval_rewards/accuracies": 0.6495000123977661, + "eval_rewards/chosen": -1.0962878465652466, + "eval_rewards/margins": 0.31867215037345886, + "eval_rewards/rejected": -1.4149600267410278, + "eval_runtime": 197.2018, + "eval_samples_per_second": 10.142, + "eval_steps_per_second": 5.071, + "step": 3280 + }, + { + "epoch": 0.43, + "learning_rate": 3.512874852694959e-06, + "logits/chosen": -2.5505518913269043, + "logits/rejected": -2.5124411582946777, + "logps/chosen": -438.30010986328125, + "logps/rejected": -424.20489501953125, + "loss": 0.6279, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.0510666370391846, + "rewards/margins": 0.2773689329624176, + "rewards/rejected": -1.3284354209899902, + "step": 3290 + }, + { + "epoch": 0.43, + "eval_logits/chosen": -2.4407126903533936, + "eval_logits/rejected": -2.458707332611084, + "eval_logps/chosen": -442.939697265625, + "eval_logps/rejected": -443.0581359863281, + "eval_loss": 0.6231091022491455, + "eval_rewards/accuracies": 0.6504999995231628, + "eval_rewards/chosen": -1.1019030809402466, + "eval_rewards/margins": 0.31757813692092896, + "eval_rewards/rejected": -1.4194810390472412, + "eval_runtime": 197.1085, + "eval_samples_per_second": 10.147, + "eval_steps_per_second": 5.073, + "step": 3290 + }, + { + "epoch": 0.43, + "learning_rate": 3.5024214656200497e-06, + "logits/chosen": -2.583045482635498, + "logits/rejected": -2.540410280227661, + "logps/chosen": -454.31658935546875, + "logps/rejected": -416.6260681152344, + "loss": 0.6383, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.0384652614593506, + "rewards/margins": 0.29643210768699646, + "rewards/rejected": -1.3348972797393799, + "step": 3300 + }, + { + "epoch": 0.43, + "eval_logits/chosen": -2.449859619140625, + "eval_logits/rejected": -2.4677672386169434, + "eval_logps/chosen": -437.662353515625, + "eval_logps/rejected": -436.8260192871094, + "eval_loss": 0.6216550469398499, + "eval_rewards/accuracies": 0.6545000076293945, + "eval_rewards/chosen": -1.0491294860839844, + "eval_rewards/margins": 0.30803078413009644, + "eval_rewards/rejected": -1.3571603298187256, + "eval_runtime": 196.8605, + "eval_samples_per_second": 10.159, + "eval_steps_per_second": 5.08, + "step": 3300 + }, + { + "epoch": 0.43, + "learning_rate": 3.491947152959958e-06, + "logits/chosen": -2.714921474456787, + "logits/rejected": -2.687603712081909, + "logps/chosen": -469.598388671875, + "logps/rejected": -471.0265197753906, + "loss": 0.6163, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.0226032733917236, + "rewards/margins": 0.3151467442512512, + "rewards/rejected": -1.33774995803833, + "step": 3310 + }, + { + "epoch": 0.43, + "eval_logits/chosen": -2.4600846767425537, + "eval_logits/rejected": -2.477358102798462, + "eval_logps/chosen": -431.9792785644531, + "eval_logps/rejected": -430.2066955566406, + "eval_loss": 0.6200381517410278, + "eval_rewards/accuracies": 0.656000018119812, + "eval_rewards/chosen": -0.9922983646392822, + "eval_rewards/margins": 0.29866811633110046, + "eval_rewards/rejected": -1.290966272354126, + "eval_runtime": 196.7653, + "eval_samples_per_second": 10.164, + "eval_steps_per_second": 5.082, + "step": 3310 + }, + { + "epoch": 0.43, + "learning_rate": 3.4814521333663497e-06, + "logits/chosen": -2.716564178466797, + "logits/rejected": -2.686750888824463, + "logps/chosen": -496.6659240722656, + "logps/rejected": -436.3720703125, + "loss": 0.6353, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.007828950881958, + "rewards/margins": 0.29953330755233765, + "rewards/rejected": -1.3073623180389404, + "step": 3320 + }, + { + "epoch": 0.43, + "eval_logits/chosen": -2.4711954593658447, + "eval_logits/rejected": -2.48844051361084, + "eval_logps/chosen": -424.7696228027344, + "eval_logps/rejected": -421.8024597167969, + "eval_loss": 0.6190692186355591, + "eval_rewards/accuracies": 0.6570000052452087, + "eval_rewards/chosen": -0.9202021360397339, + "eval_rewards/margins": 0.28672224283218384, + "eval_rewards/rejected": -1.2069244384765625, + "eval_runtime": 196.9176, + "eval_samples_per_second": 10.157, + "eval_steps_per_second": 5.078, + "step": 3320 + }, + { + "epoch": 0.44, + "learning_rate": 3.4709366259231468e-06, + "logits/chosen": -2.5870168209075928, + "logits/rejected": -2.589010238647461, + "logps/chosen": -464.08984375, + "logps/rejected": -429.16668701171875, + "loss": 0.6372, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.9650132060050964, + "rewards/margins": 0.26106229424476624, + "rewards/rejected": -1.226075530052185, + "step": 3330 + }, + { + "epoch": 0.44, + "eval_logits/chosen": -2.474238395690918, + "eval_logits/rejected": -2.491607666015625, + "eval_logps/chosen": -422.61328125, + "eval_logps/rejected": -419.35601806640625, + "eval_loss": 0.6182964444160461, + "eval_rewards/accuracies": 0.6575000286102295, + "eval_rewards/chosen": -0.8986384272575378, + "eval_rewards/margins": 0.2838219702243805, + "eval_rewards/rejected": -1.1824604272842407, + "eval_runtime": 197.1115, + "eval_samples_per_second": 10.147, + "eval_steps_per_second": 5.073, + "step": 3330 + }, + { + "epoch": 0.44, + "learning_rate": 3.460400850141956e-06, + "logits/chosen": -2.6380228996276855, + "logits/rejected": -2.552403688430786, + "logps/chosen": -395.2093811035156, + "logps/rejected": -399.08367919921875, + "loss": 0.604, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.9930335283279419, + "rewards/margins": 0.3438864052295685, + "rewards/rejected": -1.3369200229644775, + "step": 3340 + }, + { + "epoch": 0.44, + "eval_logits/chosen": -2.4701356887817383, + "eval_logits/rejected": -2.487643003463745, + "eval_logps/chosen": -425.9539489746094, + "eval_logps/rejected": -423.4430236816406, + "eval_loss": 0.6179810166358948, + "eval_rewards/accuracies": 0.6575000286102295, + "eval_rewards/chosen": -0.9320449233055115, + "eval_rewards/margins": 0.2912852168083191, + "eval_rewards/rejected": -1.2233302593231201, + "eval_runtime": 196.7212, + "eval_samples_per_second": 10.167, + "eval_steps_per_second": 5.083, + "step": 3340 + }, + { + "epoch": 0.44, + "learning_rate": 3.4498450259574858e-06, + "logits/chosen": -2.6182613372802734, + "logits/rejected": -2.616330623626709, + "logps/chosen": -425.41436767578125, + "logps/rejected": -424.77557373046875, + "loss": 0.6338, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.9113836288452148, + "rewards/margins": 0.2506571114063263, + "rewards/rejected": -1.1620408296585083, + "step": 3350 + }, + { + "epoch": 0.44, + "eval_logits/chosen": -2.476341962814331, + "eval_logits/rejected": -2.4934473037719727, + "eval_logps/chosen": -433.4237976074219, + "eval_logps/rejected": -432.0135803222656, + "eval_loss": 0.6166380643844604, + "eval_rewards/accuracies": 0.6570000052452087, + "eval_rewards/chosen": -1.0067439079284668, + "eval_rewards/margins": 0.3022918105125427, + "eval_rewards/rejected": -1.3090356588363647, + "eval_runtime": 197.0297, + "eval_samples_per_second": 10.151, + "eval_steps_per_second": 5.075, + "step": 3350 + }, + { + "epoch": 0.44, + "learning_rate": 3.439269373722957e-06, + "logits/chosen": -2.5579094886779785, + "logits/rejected": -2.568756580352783, + "logps/chosen": -428.636962890625, + "logps/rejected": -421.09466552734375, + "loss": 0.6361, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.061601996421814, + "rewards/margins": 0.2855250835418701, + "rewards/rejected": -1.3471271991729736, + "step": 3360 + }, + { + "epoch": 0.44, + "eval_logits/chosen": -2.4759557247161865, + "eval_logits/rejected": -2.4932050704956055, + "eval_logps/chosen": -434.44158935546875, + "eval_logps/rejected": -433.1445007324219, + "eval_loss": 0.6161326169967651, + "eval_rewards/accuracies": 0.6585000157356262, + "eval_rewards/chosen": -1.016922116279602, + "eval_rewards/margins": 0.3034227192401886, + "eval_rewards/rejected": -1.3203449249267578, + "eval_runtime": 197.0594, + "eval_samples_per_second": 10.149, + "eval_steps_per_second": 5.075, + "step": 3360 + }, + { + "epoch": 0.44, + "learning_rate": 3.4286741142055014e-06, + "logits/chosen": -2.6796391010284424, + "logits/rejected": -2.6622538566589355, + "logps/chosen": -454.41412353515625, + "logps/rejected": -435.6158142089844, + "loss": 0.6455, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.9890263676643372, + "rewards/margins": 0.2296716719865799, + "rewards/rejected": -1.2186981439590454, + "step": 3370 + }, + { + "epoch": 0.44, + "eval_logits/chosen": -2.4797682762145996, + "eval_logits/rejected": -2.4973316192626953, + "eval_logps/chosen": -430.6171875, + "eval_logps/rejected": -428.8773193359375, + "eval_loss": 0.6161298751831055, + "eval_rewards/accuracies": 0.6629999876022339, + "eval_rewards/chosen": -0.9786774516105652, + "eval_rewards/margins": 0.29899558424949646, + "eval_rewards/rejected": -1.2776730060577393, + "eval_runtime": 196.995, + "eval_samples_per_second": 10.153, + "eval_steps_per_second": 5.076, + "step": 3370 + }, + { + "epoch": 0.44, + "learning_rate": 3.4180594685815536e-06, + "logits/chosen": -2.670607328414917, + "logits/rejected": -2.6860036849975586, + "logps/chosen": -394.56951904296875, + "logps/rejected": -408.80474853515625, + "loss": 0.6137, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.0444326400756836, + "rewards/margins": 0.3033692240715027, + "rewards/rejected": -1.347801923751831, + "step": 3380 + }, + { + "epoch": 0.44, + "eval_logits/chosen": -2.476987838745117, + "eval_logits/rejected": -2.4944095611572266, + "eval_logps/chosen": -431.58233642578125, + "eval_logps/rejected": -429.9381103515625, + "eval_loss": 0.6160823702812195, + "eval_rewards/accuracies": 0.6625000238418579, + "eval_rewards/chosen": -0.9883295893669128, + "eval_rewards/margins": 0.29995113611221313, + "eval_rewards/rejected": -1.2882806062698364, + "eval_runtime": 196.9616, + "eval_samples_per_second": 10.154, + "eval_steps_per_second": 5.077, + "step": 3380 + }, + { + "epoch": 0.44, + "learning_rate": 3.4074256584322336e-06, + "logits/chosen": -2.5886781215667725, + "logits/rejected": -2.577141046524048, + "logps/chosen": -398.1769104003906, + "logps/rejected": -392.4770202636719, + "loss": 0.6181, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.9637646675109863, + "rewards/margins": 0.29393166303634644, + "rewards/rejected": -1.2576963901519775, + "step": 3390 + }, + { + "epoch": 0.44, + "eval_logits/chosen": -2.4677271842956543, + "eval_logits/rejected": -2.484666347503662, + "eval_logps/chosen": -433.7590026855469, + "eval_logps/rejected": -432.53369140625, + "eval_loss": 0.6153517365455627, + "eval_rewards/accuracies": 0.6604999899864197, + "eval_rewards/chosen": -1.0100959539413452, + "eval_rewards/margins": 0.304141104221344, + "eval_rewards/rejected": -1.3142372369766235, + "eval_runtime": 197.0278, + "eval_samples_per_second": 10.151, + "eval_steps_per_second": 5.075, + "step": 3390 + }, + { + "epoch": 0.44, + "learning_rate": 3.3967729057387213e-06, + "logits/chosen": -2.595198392868042, + "logits/rejected": -2.5745913982391357, + "logps/chosen": -458.33251953125, + "logps/rejected": -429.78466796875, + "loss": 0.6161, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.9517822265625, + "rewards/margins": 0.2866813540458679, + "rewards/rejected": -1.2384636402130127, + "step": 3400 + }, + { + "epoch": 0.44, + "eval_logits/chosen": -2.4600415229797363, + "eval_logits/rejected": -2.4762325286865234, + "eval_logps/chosen": -438.2619934082031, + "eval_logps/rejected": -437.5621337890625, + "eval_loss": 0.6144526600837708, + "eval_rewards/accuracies": 0.6610000133514404, + "eval_rewards/chosen": -1.0551260709762573, + "eval_rewards/margins": 0.3093947768211365, + "eval_rewards/rejected": -1.3645209074020386, + "eval_runtime": 196.8631, + "eval_samples_per_second": 10.159, + "eval_steps_per_second": 5.08, + "step": 3400 + }, + { + "epoch": 0.45, + "learning_rate": 3.386101432877624e-06, + "logits/chosen": -2.6997172832489014, + "logits/rejected": -2.6695003509521484, + "logps/chosen": -441.0243225097656, + "logps/rejected": -419.212158203125, + "loss": 0.6071, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -1.0293080806732178, + "rewards/margins": 0.3362095057964325, + "rewards/rejected": -1.3655176162719727, + "step": 3410 + }, + { + "epoch": 0.45, + "eval_logits/chosen": -2.4570953845977783, + "eval_logits/rejected": -2.473388671875, + "eval_logps/chosen": -440.8621520996094, + "eval_logps/rejected": -440.6937561035156, + "eval_loss": 0.6143715977668762, + "eval_rewards/accuracies": 0.6589999794960022, + "eval_rewards/chosen": -1.081127643585205, + "eval_rewards/margins": 0.31470969319343567, + "eval_rewards/rejected": -1.3958373069763184, + "eval_runtime": 196.9586, + "eval_samples_per_second": 10.154, + "eval_steps_per_second": 5.077, + "step": 3410 + }, + { + "epoch": 0.45, + "learning_rate": 3.375411462616332e-06, + "logits/chosen": -2.6679186820983887, + "logits/rejected": -2.6668734550476074, + "logps/chosen": -458.6727600097656, + "logps/rejected": -488.9190979003906, + "loss": 0.5929, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -1.086240291595459, + "rewards/margins": 0.3417370915412903, + "rewards/rejected": -1.4279773235321045, + "step": 3420 + }, + { + "epoch": 0.45, + "eval_logits/chosen": -2.4514076709747314, + "eval_logits/rejected": -2.4678046703338623, + "eval_logps/chosen": -444.2358703613281, + "eval_logps/rejected": -444.6484680175781, + "eval_loss": 0.6145649552345276, + "eval_rewards/accuracies": 0.656499981880188, + "eval_rewards/chosen": -1.114864468574524, + "eval_rewards/margins": 0.3205198347568512, + "eval_rewards/rejected": -1.4353843927383423, + "eval_runtime": 196.9793, + "eval_samples_per_second": 10.153, + "eval_steps_per_second": 5.077, + "step": 3420 + }, + { + "epoch": 0.45, + "learning_rate": 3.3647032181083696e-06, + "logits/chosen": -2.7156121730804443, + "logits/rejected": -2.707794666290283, + "logps/chosen": -506.02716064453125, + "logps/rejected": -497.256103515625, + "loss": 0.6345, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -1.1724050045013428, + "rewards/margins": 0.2621404528617859, + "rewards/rejected": -1.4345453977584839, + "step": 3430 + }, + { + "epoch": 0.45, + "eval_logits/chosen": -2.4483513832092285, + "eval_logits/rejected": -2.464862108230591, + "eval_logps/chosen": -444.25457763671875, + "eval_logps/rejected": -444.73095703125, + "eval_loss": 0.6144143342971802, + "eval_rewards/accuracies": 0.659500002861023, + "eval_rewards/chosen": -1.1150517463684082, + "eval_rewards/margins": 0.32115766406059265, + "eval_rewards/rejected": -1.4362094402313232, + "eval_runtime": 196.6976, + "eval_samples_per_second": 10.168, + "eval_steps_per_second": 5.084, + "step": 3430 + }, + { + "epoch": 0.45, + "learning_rate": 3.3539769228887382e-06, + "logits/chosen": -2.6738858222961426, + "logits/rejected": -2.6460211277008057, + "logps/chosen": -491.38385009765625, + "logps/rejected": -500.1153259277344, + "loss": 0.5878, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.0534520149230957, + "rewards/margins": 0.3450348377227783, + "rewards/rejected": -1.398486852645874, + "step": 3440 + }, + { + "epoch": 0.45, + "eval_logits/chosen": -2.4468305110931396, + "eval_logits/rejected": -2.463901996612549, + "eval_logps/chosen": -441.3050537109375, + "eval_logps/rejected": -441.7398986816406, + "eval_loss": 0.6141930222511292, + "eval_rewards/accuracies": 0.6600000262260437, + "eval_rewards/chosen": -1.0855563879013062, + "eval_rewards/margins": 0.32074230909347534, + "eval_rewards/rejected": -1.4062987565994263, + "eval_runtime": 197.0856, + "eval_samples_per_second": 10.148, + "eval_steps_per_second": 5.074, + "step": 3440 + }, + { + "epoch": 0.45, + "learning_rate": 3.343232800869247e-06, + "logits/chosen": -2.6060128211975098, + "logits/rejected": -2.615265369415283, + "logps/chosen": -398.96343994140625, + "logps/rejected": -360.34259033203125, + "loss": 0.6214, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.0496917963027954, + "rewards/margins": 0.25588348507881165, + "rewards/rejected": -1.3055751323699951, + "step": 3450 + }, + { + "epoch": 0.45, + "eval_logits/chosen": -2.4470934867858887, + "eval_logits/rejected": -2.4643971920013428, + "eval_logps/chosen": -434.67132568359375, + "eval_logps/rejected": -434.3101806640625, + "eval_loss": 0.6136829853057861, + "eval_rewards/accuracies": 0.6604999899864197, + "eval_rewards/chosen": -1.019219160079956, + "eval_rewards/margins": 0.3127825856208801, + "eval_rewards/rejected": -1.3320015668869019, + "eval_runtime": 196.7982, + "eval_samples_per_second": 10.163, + "eval_steps_per_second": 5.081, + "step": 3450 + }, + { + "epoch": 0.45, + "learning_rate": 3.33247107633384e-06, + "logits/chosen": -2.6482961177825928, + "logits/rejected": -2.6445212364196777, + "logps/chosen": -420.53955078125, + "logps/rejected": -450.9517517089844, + "loss": 0.5646, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.8963683843612671, + "rewards/margins": 0.42489439249038696, + "rewards/rejected": -1.3212627172470093, + "step": 3460 + }, + { + "epoch": 0.45, + "eval_logits/chosen": -2.4395406246185303, + "eval_logits/rejected": -2.456937551498413, + "eval_logps/chosen": -437.60467529296875, + "eval_logps/rejected": -437.75543212890625, + "eval_loss": 0.6137276887893677, + "eval_rewards/accuracies": 0.6589999794960022, + "eval_rewards/chosen": -1.0485526323318481, + "eval_rewards/margins": 0.3179013133049011, + "eval_rewards/rejected": -1.3664538860321045, + "eval_runtime": 196.9894, + "eval_samples_per_second": 10.153, + "eval_steps_per_second": 5.076, + "step": 3460 + }, + { + "epoch": 0.45, + "learning_rate": 3.3216919739339155e-06, + "logits/chosen": -2.645444869995117, + "logits/rejected": -2.592423677444458, + "logps/chosen": -463.4039001464844, + "logps/rejected": -436.16583251953125, + "loss": 0.5993, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.0509432554244995, + "rewards/margins": 0.3933911621570587, + "rewards/rejected": -1.4443343877792358, + "step": 3470 + }, + { + "epoch": 0.45, + "eval_logits/chosen": -2.4293198585510254, + "eval_logits/rejected": -2.4467997550964355, + "eval_logps/chosen": -438.691162109375, + "eval_logps/rejected": -439.1224060058594, + "eval_loss": 0.6141647696495056, + "eval_rewards/accuracies": 0.6614999771118164, + "eval_rewards/chosen": -1.0594172477722168, + "eval_rewards/margins": 0.3207065761089325, + "eval_rewards/rejected": -1.3801236152648926, + "eval_runtime": 196.8419, + "eval_samples_per_second": 10.16, + "eval_steps_per_second": 5.08, + "step": 3470 + }, + { + "epoch": 0.46, + "learning_rate": 3.310895718683635e-06, + "logits/chosen": -2.6264524459838867, + "logits/rejected": -2.636923313140869, + "logps/chosen": -471.2666015625, + "logps/rejected": -454.32861328125, + "loss": 0.6814, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -1.0823160409927368, + "rewards/margins": 0.1945910006761551, + "rewards/rejected": -1.2769070863723755, + "step": 3480 + }, + { + "epoch": 0.46, + "eval_logits/chosen": -2.4283411502838135, + "eval_logits/rejected": -2.4459471702575684, + "eval_logps/chosen": -436.0476989746094, + "eval_logps/rejected": -436.1694641113281, + "eval_loss": 0.6140268445014954, + "eval_rewards/accuracies": 0.6654999852180481, + "eval_rewards/chosen": -1.0329828262329102, + "eval_rewards/margins": 0.31761178374290466, + "eval_rewards/rejected": -1.3505945205688477, + "eval_runtime": 197.0042, + "eval_samples_per_second": 10.152, + "eval_steps_per_second": 5.076, + "step": 3480 + }, + { + "epoch": 0.46, + "learning_rate": 3.3000825359552256e-06, + "logits/chosen": -2.6396970748901367, + "logits/rejected": -2.6334455013275146, + "logps/chosen": -437.2066955566406, + "logps/rejected": -457.81744384765625, + "loss": 0.6004, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.9559956789016724, + "rewards/margins": 0.3328457176685333, + "rewards/rejected": -1.2888413667678833, + "step": 3490 + }, + { + "epoch": 0.46, + "eval_logits/chosen": -2.431947946548462, + "eval_logits/rejected": -2.449997663497925, + "eval_logps/chosen": -431.141845703125, + "eval_logps/rejected": -430.640625, + "eval_loss": 0.6140121221542358, + "eval_rewards/accuracies": 0.6644999980926514, + "eval_rewards/chosen": -0.9839242100715637, + "eval_rewards/margins": 0.31138184666633606, + "eval_rewards/rejected": -1.2953060865402222, + "eval_runtime": 197.1838, + "eval_samples_per_second": 10.143, + "eval_steps_per_second": 5.071, + "step": 3490 + }, + { + "epoch": 0.46, + "learning_rate": 3.2892526514742778e-06, + "logits/chosen": -2.6109142303466797, + "logits/rejected": -2.5949742794036865, + "logps/chosen": -440.78692626953125, + "logps/rejected": -423.1656188964844, + "loss": 0.6039, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.998335063457489, + "rewards/margins": 0.3294587731361389, + "rewards/rejected": -1.327793836593628, + "step": 3500 + }, + { + "epoch": 0.46, + "eval_logits/chosen": -2.4382123947143555, + "eval_logits/rejected": -2.4562125205993652, + "eval_logps/chosen": -426.1593322753906, + "eval_logps/rejected": -425.08843994140625, + "eval_loss": 0.6142340302467346, + "eval_rewards/accuracies": 0.6654999852180481, + "eval_rewards/chosen": -0.9340996742248535, + "eval_rewards/margins": 0.30568426847457886, + "eval_rewards/rejected": -1.2397838830947876, + "eval_runtime": 196.947, + "eval_samples_per_second": 10.155, + "eval_steps_per_second": 5.078, + "step": 3500 + }, + { + "epoch": 0.46, + "learning_rate": 3.27840629131503e-06, + "logits/chosen": -2.6633572578430176, + "logits/rejected": -2.6355559825897217, + "logps/chosen": -450.248291015625, + "logps/rejected": -450.91748046875, + "loss": 0.5735, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.9890987277030945, + "rewards/margins": 0.45345035195350647, + "rewards/rejected": -1.4425491094589233, + "step": 3510 + }, + { + "epoch": 0.46, + "eval_logits/chosen": -2.434354305267334, + "eval_logits/rejected": -2.4525110721588135, + "eval_logps/chosen": -424.9874267578125, + "eval_logps/rejected": -423.5655212402344, + "eval_loss": 0.614112913608551, + "eval_rewards/accuracies": 0.6639999747276306, + "eval_rewards/chosen": -0.9223799705505371, + "eval_rewards/margins": 0.3021751642227173, + "eval_rewards/rejected": -1.2245551347732544, + "eval_runtime": 196.8662, + "eval_samples_per_second": 10.159, + "eval_steps_per_second": 5.08, + "step": 3510 + }, + { + "epoch": 0.46, + "learning_rate": 3.2675436818956522e-06, + "logits/chosen": -2.647305488586426, + "logits/rejected": -2.6159074306488037, + "logps/chosen": -401.4864501953125, + "logps/rejected": -410.5609436035156, + "loss": 0.6345, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.8999361991882324, + "rewards/margins": 0.1908596307039261, + "rewards/rejected": -1.0907957553863525, + "step": 3520 + }, + { + "epoch": 0.46, + "eval_logits/chosen": -2.4260454177856445, + "eval_logits/rejected": -2.443659782409668, + "eval_logps/chosen": -429.8298645019531, + "eval_logps/rejected": -428.6860656738281, + "eval_loss": 0.6133183836936951, + "eval_rewards/accuracies": 0.6629999876022339, + "eval_rewards/chosen": -0.9708043932914734, + "eval_rewards/margins": 0.30495625734329224, + "eval_rewards/rejected": -1.2757607698440552, + "eval_runtime": 197.0424, + "eval_samples_per_second": 10.15, + "eval_steps_per_second": 5.075, + "step": 3520 + }, + { + "epoch": 0.46, + "learning_rate": 3.2566650499735185e-06, + "logits/chosen": -2.506486415863037, + "logits/rejected": -2.539597988128662, + "logps/chosen": -454.28802490234375, + "logps/rejected": -455.85968017578125, + "loss": 0.5534, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.900974452495575, + "rewards/margins": 0.45443105697631836, + "rewards/rejected": -1.355405569076538, + "step": 3530 + }, + { + "epoch": 0.46, + "eval_logits/chosen": -2.4236092567443848, + "eval_logits/rejected": -2.4410953521728516, + "eval_logps/chosen": -433.484619140625, + "eval_logps/rejected": -432.83233642578125, + "eval_loss": 0.613182008266449, + "eval_rewards/accuracies": 0.6625000238418579, + "eval_rewards/chosen": -1.0073524713516235, + "eval_rewards/margins": 0.3098709285259247, + "eval_rewards/rejected": -1.317223310470581, + "eval_runtime": 197.0012, + "eval_samples_per_second": 10.152, + "eval_steps_per_second": 5.076, + "step": 3530 + }, + { + "epoch": 0.46, + "learning_rate": 3.2457706226404715e-06, + "logits/chosen": -2.5730178356170654, + "logits/rejected": -2.5727334022521973, + "logps/chosen": -440.7850036621094, + "logps/rejected": -412.87457275390625, + "loss": 0.6593, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -1.0789464712142944, + "rewards/margins": 0.23157748579978943, + "rewards/rejected": -1.3105241060256958, + "step": 3540 + }, + { + "epoch": 0.46, + "eval_logits/chosen": -2.4274179935455322, + "eval_logits/rejected": -2.4444172382354736, + "eval_logps/chosen": -436.7542724609375, + "eval_logps/rejected": -436.4505615234375, + "eval_loss": 0.6123189926147461, + "eval_rewards/accuracies": 0.6635000109672546, + "eval_rewards/chosen": -1.0400488376617432, + "eval_rewards/margins": 0.31335678696632385, + "eval_rewards/rejected": -1.3534057140350342, + "eval_runtime": 196.985, + "eval_samples_per_second": 10.153, + "eval_steps_per_second": 5.077, + "step": 3540 + }, + { + "epoch": 0.46, + "learning_rate": 3.2348606273180847e-06, + "logits/chosen": -2.6839632987976074, + "logits/rejected": -2.6603915691375732, + "logps/chosen": -475.0283203125, + "logps/rejected": -411.526611328125, + "loss": 0.5675, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.9772094488143921, + "rewards/margins": 0.3736717700958252, + "rewards/rejected": -1.3508812189102173, + "step": 3550 + }, + { + "epoch": 0.46, + "eval_logits/chosen": -2.4247100353240967, + "eval_logits/rejected": -2.441316604614258, + "eval_logps/chosen": -440.3761901855469, + "eval_logps/rejected": -440.4826354980469, + "eval_loss": 0.6120977401733398, + "eval_rewards/accuracies": 0.6639999747276306, + "eval_rewards/chosen": -1.0762676000595093, + "eval_rewards/margins": 0.3174583613872528, + "eval_rewards/rejected": -1.3937259912490845, + "eval_runtime": 197.3208, + "eval_samples_per_second": 10.136, + "eval_steps_per_second": 5.068, + "step": 3550 + }, + { + "epoch": 0.47, + "learning_rate": 3.2239352917529165e-06, + "logits/chosen": -2.709627389907837, + "logits/rejected": -2.689507246017456, + "logps/chosen": -493.081298828125, + "logps/rejected": -499.88482666015625, + "loss": 0.5771, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -1.0717947483062744, + "rewards/margins": 0.4040806293487549, + "rewards/rejected": -1.4758752584457397, + "step": 3560 + }, + { + "epoch": 0.47, + "eval_logits/chosen": -2.418405055999756, + "eval_logits/rejected": -2.4349021911621094, + "eval_logps/chosen": -442.52825927734375, + "eval_logps/rejected": -443.28399658203125, + "eval_loss": 0.61195307970047, + "eval_rewards/accuracies": 0.6635000109672546, + "eval_rewards/chosen": -1.0977885723114014, + "eval_rewards/margins": 0.32395121455192566, + "eval_rewards/rejected": -1.4217398166656494, + "eval_runtime": 196.8612, + "eval_samples_per_second": 10.159, + "eval_steps_per_second": 5.08, + "step": 3560 + }, + { + "epoch": 0.47, + "learning_rate": 3.2129948440117487e-06, + "logits/chosen": -2.692121744155884, + "logits/rejected": -2.6730172634124756, + "logps/chosen": -423.61553955078125, + "logps/rejected": -441.45556640625, + "loss": 0.5887, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -1.0229573249816895, + "rewards/margins": 0.3558308482170105, + "rewards/rejected": -1.3787882328033447, + "step": 3570 + }, + { + "epoch": 0.47, + "eval_logits/chosen": -2.4225990772247314, + "eval_logits/rejected": -2.438905715942383, + "eval_logps/chosen": -442.7872619628906, + "eval_logps/rejected": -443.790283203125, + "eval_loss": 0.6118631362915039, + "eval_rewards/accuracies": 0.6650000214576721, + "eval_rewards/chosen": -1.100378155708313, + "eval_rewards/margins": 0.3264242112636566, + "eval_rewards/rejected": -1.4268025159835815, + "eval_runtime": 196.93, + "eval_samples_per_second": 10.156, + "eval_steps_per_second": 5.078, + "step": 3570 + }, + { + "epoch": 0.47, + "learning_rate": 3.202039512476833e-06, + "logits/chosen": -2.5658066272735596, + "logits/rejected": -2.5501656532287598, + "logps/chosen": -401.03814697265625, + "logps/rejected": -433.0650939941406, + "loss": 0.5473, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -1.0611445903778076, + "rewards/margins": 0.4310608506202698, + "rewards/rejected": -1.4922053813934326, + "step": 3580 + }, + { + "epoch": 0.47, + "eval_logits/chosen": -2.418958902359009, + "eval_logits/rejected": -2.4356112480163574, + "eval_logps/chosen": -442.1664733886719, + "eval_logps/rejected": -443.41705322265625, + "eval_loss": 0.6123986840248108, + "eval_rewards/accuracies": 0.6620000004768372, + "eval_rewards/chosen": -1.0941705703735352, + "eval_rewards/margins": 0.32889971137046814, + "eval_rewards/rejected": -1.4230701923370361, + "eval_runtime": 196.9636, + "eval_samples_per_second": 10.154, + "eval_steps_per_second": 5.077, + "step": 3580 + }, + { + "epoch": 0.47, + "learning_rate": 3.1910695258411216e-06, + "logits/chosen": -2.648796319961548, + "logits/rejected": -2.595101833343506, + "logps/chosen": -427.6651306152344, + "logps/rejected": -396.94354248046875, + "loss": 0.5784, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.957931637763977, + "rewards/margins": 0.39088284969329834, + "rewards/rejected": -1.3488144874572754, + "step": 3590 + }, + { + "epoch": 0.47, + "eval_logits/chosen": -2.427513360977173, + "eval_logits/rejected": -2.4447529315948486, + "eval_logps/chosen": -439.5167541503906, + "eval_logps/rejected": -440.6925354003906, + "eval_loss": 0.6133009195327759, + "eval_rewards/accuracies": 0.6604999899864197, + "eval_rewards/chosen": -1.0676734447479248, + "eval_rewards/margins": 0.32815155386924744, + "eval_rewards/rejected": -1.395824909210205, + "eval_runtime": 196.9972, + "eval_samples_per_second": 10.152, + "eval_steps_per_second": 5.076, + "step": 3590 + }, + { + "epoch": 0.47, + "learning_rate": 3.1800851131034904e-06, + "logits/chosen": -2.6219043731689453, + "logits/rejected": -2.624768018722534, + "logps/chosen": -436.5489807128906, + "logps/rejected": -426.3831481933594, + "loss": 0.6345, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.1452642679214478, + "rewards/margins": 0.3178446590900421, + "rewards/rejected": -1.463108777999878, + "step": 3600 + }, + { + "epoch": 0.47, + "eval_logits/chosen": -2.4401590824127197, + "eval_logits/rejected": -2.4576549530029297, + "eval_logps/chosen": -435.9314880371094, + "eval_logps/rejected": -436.886962890625, + "eval_loss": 0.6135310530662537, + "eval_rewards/accuracies": 0.6625000238418579, + "eval_rewards/chosen": -1.0318211317062378, + "eval_rewards/margins": 0.3259483575820923, + "eval_rewards/rejected": -1.35776948928833, + "eval_runtime": 196.9714, + "eval_samples_per_second": 10.154, + "eval_steps_per_second": 5.077, + "step": 3600 + }, + { + "epoch": 0.47, + "learning_rate": 3.169086503563962e-06, + "logits/chosen": -2.6728599071502686, + "logits/rejected": -2.660001516342163, + "logps/chosen": -411.0406188964844, + "logps/rejected": -456.24920654296875, + "loss": 0.6347, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.9420035481452942, + "rewards/margins": 0.2699907124042511, + "rewards/rejected": -1.2119942903518677, + "step": 3610 + }, + { + "epoch": 0.47, + "eval_logits/chosen": -2.4506571292877197, + "eval_logits/rejected": -2.468738079071045, + "eval_logps/chosen": -429.9488220214844, + "eval_logps/rejected": -430.1793212890625, + "eval_loss": 0.6143242120742798, + "eval_rewards/accuracies": 0.6620000004768372, + "eval_rewards/chosen": -0.9719939827919006, + "eval_rewards/margins": 0.3186990022659302, + "eval_rewards/rejected": -1.2906930446624756, + "eval_runtime": 197.4584, + "eval_samples_per_second": 10.129, + "eval_steps_per_second": 5.064, + "step": 3610 + }, + { + "epoch": 0.47, + "learning_rate": 3.1580739268189165e-06, + "logits/chosen": -2.660468578338623, + "logits/rejected": -2.6029036045074463, + "logps/chosen": -440.058349609375, + "logps/rejected": -432.2239685058594, + "loss": 0.5758, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.9574350118637085, + "rewards/margins": 0.4528959393501282, + "rewards/rejected": -1.4103310108184814, + "step": 3620 + }, + { + "epoch": 0.47, + "eval_logits/chosen": -2.45180344581604, + "eval_logits/rejected": -2.470142364501953, + "eval_logps/chosen": -428.6710205078125, + "eval_logps/rejected": -428.8226623535156, + "eval_loss": 0.6142221093177795, + "eval_rewards/accuracies": 0.6660000085830688, + "eval_rewards/chosen": -0.959216296672821, + "eval_rewards/margins": 0.3179102838039398, + "eval_rewards/rejected": -1.2771265506744385, + "eval_runtime": 196.9598, + "eval_samples_per_second": 10.154, + "eval_steps_per_second": 5.077, + "step": 3620 + }, + { + "epoch": 0.48, + "learning_rate": 3.147047612756302e-06, + "logits/chosen": -2.6150004863739014, + "logits/rejected": -2.660050630569458, + "logps/chosen": -448.8761291503906, + "logps/rejected": -477.34722900390625, + "loss": 0.5851, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.8641443252563477, + "rewards/margins": 0.3695284128189087, + "rewards/rejected": -1.233672857284546, + "step": 3630 + }, + { + "epoch": 0.48, + "eval_logits/chosen": -2.442091226577759, + "eval_logits/rejected": -2.460456132888794, + "eval_logps/chosen": -430.6168212890625, + "eval_logps/rejected": -431.0765686035156, + "eval_loss": 0.6144885420799255, + "eval_rewards/accuracies": 0.6690000295639038, + "eval_rewards/chosen": -0.9786739349365234, + "eval_rewards/margins": 0.3209916651248932, + "eval_rewards/rejected": -1.2996655702590942, + "eval_runtime": 196.9581, + "eval_samples_per_second": 10.154, + "eval_steps_per_second": 5.077, + "step": 3630 + }, + { + "epoch": 0.48, + "learning_rate": 3.136007791550833e-06, + "logits/chosen": -2.544302463531494, + "logits/rejected": -2.532585859298706, + "logps/chosen": -399.3792724609375, + "logps/rejected": -384.9727478027344, + "loss": 0.5792, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.9436852335929871, + "rewards/margins": 0.408308744430542, + "rewards/rejected": -1.3519941568374634, + "step": 3640 + }, + { + "epoch": 0.48, + "eval_logits/chosen": -2.4333150386810303, + "eval_logits/rejected": -2.451690196990967, + "eval_logps/chosen": -435.148193359375, + "eval_logps/rejected": -436.144775390625, + "eval_loss": 0.6144400238990784, + "eval_rewards/accuracies": 0.6639999747276306, + "eval_rewards/chosen": -1.0239877700805664, + "eval_rewards/margins": 0.32635965943336487, + "eval_rewards/rejected": -1.3503473997116089, + "eval_runtime": 196.9216, + "eval_samples_per_second": 10.156, + "eval_steps_per_second": 5.078, + "step": 3640 + }, + { + "epoch": 0.48, + "learning_rate": 3.1249546936591848e-06, + "logits/chosen": -2.6114816665649414, + "logits/rejected": -2.5710997581481934, + "logps/chosen": -390.9961242675781, + "logps/rejected": -406.75762939453125, + "loss": 0.6328, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.0057404041290283, + "rewards/margins": 0.2645101547241211, + "rewards/rejected": -1.2702504396438599, + "step": 3650 + }, + { + "epoch": 0.48, + "eval_logits/chosen": -2.434093475341797, + "eval_logits/rejected": -2.4516608715057373, + "eval_logps/chosen": -440.60430908203125, + "eval_logps/rejected": -442.5193176269531, + "eval_loss": 0.6136277318000793, + "eval_rewards/accuracies": 0.6629999876022339, + "eval_rewards/chosen": -1.0785490274429321, + "eval_rewards/margins": 0.335544228553772, + "eval_rewards/rejected": -1.4140933752059937, + "eval_runtime": 197.0275, + "eval_samples_per_second": 10.151, + "eval_steps_per_second": 5.075, + "step": 3650 + }, + { + "epoch": 0.48, + "learning_rate": 3.1138885498151843e-06, + "logits/chosen": -2.520498275756836, + "logits/rejected": -2.5581088066101074, + "logps/chosen": -451.1759338378906, + "logps/rejected": -459.38348388671875, + "loss": 0.5169, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.0737239122390747, + "rewards/margins": 0.6751200556755066, + "rewards/rejected": -1.7488439083099365, + "step": 3660 + }, + { + "epoch": 0.48, + "eval_logits/chosen": -2.419292449951172, + "eval_logits/rejected": -2.4361040592193604, + "eval_logps/chosen": -449.6213073730469, + "eval_logps/rejected": -452.7323303222656, + "eval_loss": 0.6136282682418823, + "eval_rewards/accuracies": 0.6654999852180481, + "eval_rewards/chosen": -1.1687185764312744, + "eval_rewards/margins": 0.34750431776046753, + "eval_rewards/rejected": -1.5162231922149658, + "eval_runtime": 196.9553, + "eval_samples_per_second": 10.155, + "eval_steps_per_second": 5.077, + "step": 3660 + }, + { + "epoch": 0.48, + "learning_rate": 3.1028095910249937e-06, + "logits/chosen": -2.7278361320495605, + "logits/rejected": -2.664435863494873, + "logps/chosen": -457.93817138671875, + "logps/rejected": -419.0298767089844, + "loss": 0.5708, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -1.0790977478027344, + "rewards/margins": 0.4191462993621826, + "rewards/rejected": -1.498244047164917, + "step": 3670 + }, + { + "epoch": 0.48, + "eval_logits/chosen": -2.4101860523223877, + "eval_logits/rejected": -2.4262466430664062, + "eval_logps/chosen": -454.6792297363281, + "eval_logps/rejected": -458.39801025390625, + "eval_loss": 0.6136077642440796, + "eval_rewards/accuracies": 0.6650000214576721, + "eval_rewards/chosen": -1.219298005104065, + "eval_rewards/margins": 0.3535817563533783, + "eval_rewards/rejected": -1.5728797912597656, + "eval_runtime": 196.997, + "eval_samples_per_second": 10.152, + "eval_steps_per_second": 5.076, + "step": 3670 + }, + { + "epoch": 0.48, + "learning_rate": 3.0917180485622895e-06, + "logits/chosen": -2.551952362060547, + "logits/rejected": -2.5245137214660645, + "logps/chosen": -446.6317443847656, + "logps/rejected": -429.69891357421875, + "loss": 0.6218, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -1.197405219078064, + "rewards/margins": 0.4143539071083069, + "rewards/rejected": -1.6117591857910156, + "step": 3680 + }, + { + "epoch": 0.48, + "eval_logits/chosen": -2.4147117137908936, + "eval_logits/rejected": -2.4315760135650635, + "eval_logps/chosen": -444.9193115234375, + "eval_logps/rejected": -447.4606018066406, + "eval_loss": 0.6137916445732117, + "eval_rewards/accuracies": 0.6700000166893005, + "eval_rewards/chosen": -1.1216992139816284, + "eval_rewards/margins": 0.34180694818496704, + "eval_rewards/rejected": -1.4635063409805298, + "eval_runtime": 197.2281, + "eval_samples_per_second": 10.141, + "eval_steps_per_second": 5.07, + "step": 3680 + }, + { + "epoch": 0.48, + "learning_rate": 3.0806141539634294e-06, + "logits/chosen": -2.624244213104248, + "logits/rejected": -2.615341901779175, + "logps/chosen": -415.6836853027344, + "logps/rejected": -387.3684997558594, + "loss": 0.6159, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.073737382888794, + "rewards/margins": 0.3058207631111145, + "rewards/rejected": -1.3795579671859741, + "step": 3690 + }, + { + "epoch": 0.48, + "eval_logits/chosen": -2.4173150062561035, + "eval_logits/rejected": -2.434377908706665, + "eval_logps/chosen": -437.3270568847656, + "eval_logps/rejected": -438.8552551269531, + "eval_loss": 0.6134853363037109, + "eval_rewards/accuracies": 0.6644999980926514, + "eval_rewards/chosen": -1.0457768440246582, + "eval_rewards/margins": 0.33167514204978943, + "eval_rewards/rejected": -1.37745201587677, + "eval_runtime": 196.9768, + "eval_samples_per_second": 10.153, + "eval_steps_per_second": 5.077, + "step": 3690 + }, + { + "epoch": 0.48, + "learning_rate": 3.069498139022624e-06, + "logits/chosen": -2.7119345664978027, + "logits/rejected": -2.6447341442108154, + "logps/chosen": -443.99627685546875, + "logps/rejected": -411.5511779785156, + "loss": 0.6424, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -1.0855329036712646, + "rewards/margins": 0.26062411069869995, + "rewards/rejected": -1.3461570739746094, + "step": 3700 + }, + { + "epoch": 0.48, + "eval_logits/chosen": -2.4126713275909424, + "eval_logits/rejected": -2.429412364959717, + "eval_logps/chosen": -434.31158447265625, + "eval_logps/rejected": -435.1674499511719, + "eval_loss": 0.6130565404891968, + "eval_rewards/accuracies": 0.6690000295639038, + "eval_rewards/chosen": -1.0156217813491821, + "eval_rewards/margins": 0.3249521553516388, + "eval_rewards/rejected": -1.3405741453170776, + "eval_runtime": 197.1047, + "eval_samples_per_second": 10.147, + "eval_steps_per_second": 5.073, + "step": 3700 + }, + { + "epoch": 0.49, + "learning_rate": 3.0583702357870964e-06, + "logits/chosen": -2.613340139389038, + "logits/rejected": -2.623927593231201, + "logps/chosen": -476.4661560058594, + "logps/rejected": -504.0572204589844, + "loss": 0.6458, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.0363906621932983, + "rewards/margins": 0.25275081396102905, + "rewards/rejected": -1.2891414165496826, + "step": 3710 + }, + { + "epoch": 0.49, + "eval_logits/chosen": -2.4173309803009033, + "eval_logits/rejected": -2.4342410564422607, + "eval_logps/chosen": -429.7125549316406, + "eval_logps/rejected": -429.9491882324219, + "eval_loss": 0.6135927438735962, + "eval_rewards/accuracies": 0.6664999723434448, + "eval_rewards/chosen": -0.9696312546730042, + "eval_rewards/margins": 0.31876012682914734, + "eval_rewards/rejected": -1.288391351699829, + "eval_runtime": 197.0089, + "eval_samples_per_second": 10.152, + "eval_steps_per_second": 5.076, + "step": 3710 + }, + { + "epoch": 0.49, + "learning_rate": 3.0472306765522393e-06, + "logits/chosen": -2.6709144115448, + "logits/rejected": -2.689739465713501, + "logps/chosen": -409.62322998046875, + "logps/rejected": -401.40167236328125, + "loss": 0.6061, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.9307082295417786, + "rewards/margins": 0.3507843315601349, + "rewards/rejected": -1.2814924716949463, + "step": 3720 + }, + { + "epoch": 0.49, + "eval_logits/chosen": -2.427380323410034, + "eval_logits/rejected": -2.4437131881713867, + "eval_logps/chosen": -426.92364501953125, + "eval_logps/rejected": -426.4053955078125, + "eval_loss": 0.6134113073348999, + "eval_rewards/accuracies": 0.6625000238418579, + "eval_rewards/chosen": -0.9417427778244019, + "eval_rewards/margins": 0.311210960149765, + "eval_rewards/rejected": -1.2529538869857788, + "eval_runtime": 196.8867, + "eval_samples_per_second": 10.158, + "eval_steps_per_second": 5.079, + "step": 3720 + }, + { + "epoch": 0.49, + "learning_rate": 3.0360796938567628e-06, + "logits/chosen": -2.6675527095794678, + "logits/rejected": -2.625060558319092, + "logps/chosen": -424.24627685546875, + "logps/rejected": -415.8722229003906, + "loss": 0.5655, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.9266021847724915, + "rewards/margins": 0.4293970465660095, + "rewards/rejected": -1.355999231338501, + "step": 3730 + }, + { + "epoch": 0.49, + "eval_logits/chosen": -2.4288480281829834, + "eval_logits/rejected": -2.444474697113037, + "eval_logps/chosen": -431.9655456542969, + "eval_logps/rejected": -432.1391296386719, + "eval_loss": 0.612882137298584, + "eval_rewards/accuracies": 0.6585000157356262, + "eval_rewards/chosen": -0.9921613335609436, + "eval_rewards/margins": 0.3181297183036804, + "eval_rewards/rejected": -1.310291051864624, + "eval_runtime": 196.9777, + "eval_samples_per_second": 10.153, + "eval_steps_per_second": 5.077, + "step": 3730 + }, + { + "epoch": 0.49, + "learning_rate": 3.0249175204778435e-06, + "logits/chosen": -2.667661190032959, + "logits/rejected": -2.638627052307129, + "logps/chosen": -424.33477783203125, + "logps/rejected": -438.136962890625, + "loss": 0.5771, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.9603897333145142, + "rewards/margins": 0.4101681113243103, + "rewards/rejected": -1.3705580234527588, + "step": 3740 + }, + { + "epoch": 0.49, + "eval_logits/chosen": -2.425194025039673, + "eval_logits/rejected": -2.4405641555786133, + "eval_logps/chosen": -436.3723449707031, + "eval_logps/rejected": -437.3710021972656, + "eval_loss": 0.6123316287994385, + "eval_rewards/accuracies": 0.6639999747276306, + "eval_rewards/chosen": -1.0362294912338257, + "eval_rewards/margins": 0.3263804614543915, + "eval_rewards/rejected": -1.36260986328125, + "eval_runtime": 196.9857, + "eval_samples_per_second": 10.153, + "eval_steps_per_second": 5.077, + "step": 3740 + }, + { + "epoch": 0.49, + "learning_rate": 3.0137443894262634e-06, + "logits/chosen": -2.5059690475463867, + "logits/rejected": -2.450510025024414, + "logps/chosen": -441.3265686035156, + "logps/rejected": -425.53814697265625, + "loss": 0.545, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.0120147466659546, + "rewards/margins": 0.498440682888031, + "rewards/rejected": -1.5104554891586304, + "step": 3750 + }, + { + "epoch": 0.49, + "eval_logits/chosen": -2.417469024658203, + "eval_logits/rejected": -2.432871103286743, + "eval_logps/chosen": -442.3606262207031, + "eval_logps/rejected": -444.4958190917969, + "eval_loss": 0.6120953559875488, + "eval_rewards/accuracies": 0.6679999828338623, + "eval_rewards/chosen": -1.0961120128631592, + "eval_rewards/margins": 0.33774587512016296, + "eval_rewards/rejected": -1.4338579177856445, + "eval_runtime": 197.4301, + "eval_samples_per_second": 10.13, + "eval_steps_per_second": 5.065, + "step": 3750 + }, + { + "epoch": 0.49, + "learning_rate": 3.0025605339415476e-06, + "logits/chosen": -2.5999059677124023, + "logits/rejected": -2.57336163520813, + "logps/chosen": -444.1121520996094, + "logps/rejected": -437.1282653808594, + "loss": 0.5936, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.0731585025787354, + "rewards/margins": 0.3845066428184509, + "rewards/rejected": -1.4576650857925415, + "step": 3760 + }, + { + "epoch": 0.49, + "eval_logits/chosen": -2.40948486328125, + "eval_logits/rejected": -2.424887180328369, + "eval_logps/chosen": -446.61859130859375, + "eval_logps/rejected": -449.55389404296875, + "eval_loss": 0.6122823357582092, + "eval_rewards/accuracies": 0.6650000214576721, + "eval_rewards/chosen": -1.1386919021606445, + "eval_rewards/margins": 0.3457469046115875, + "eval_rewards/rejected": -1.4844387769699097, + "eval_runtime": 196.8917, + "eval_samples_per_second": 10.158, + "eval_steps_per_second": 5.079, + "step": 3760 + }, + { + "epoch": 0.49, + "learning_rate": 2.9913661874870923e-06, + "logits/chosen": -2.5459322929382324, + "logits/rejected": -2.5608432292938232, + "logps/chosen": -435.58148193359375, + "logps/rejected": -438.0994567871094, + "loss": 0.5423, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -1.134264349937439, + "rewards/margins": 0.44381627440452576, + "rewards/rejected": -1.5780807733535767, + "step": 3770 + }, + { + "epoch": 0.49, + "eval_logits/chosen": -2.3963429927825928, + "eval_logits/rejected": -2.4115374088287354, + "eval_logps/chosen": -453.9317321777344, + "eval_logps/rejected": -457.8913269042969, + "eval_loss": 0.6133984327316284, + "eval_rewards/accuracies": 0.6675000190734863, + "eval_rewards/chosen": -1.2118229866027832, + "eval_rewards/margins": 0.3559902310371399, + "eval_rewards/rejected": -1.5678132772445679, + "eval_runtime": 196.9816, + "eval_samples_per_second": 10.153, + "eval_steps_per_second": 5.077, + "step": 3770 + }, + { + "epoch": 0.49, + "learning_rate": 2.980161583745294e-06, + "logits/chosen": -2.5888137817382812, + "logits/rejected": -2.574763774871826, + "logps/chosen": -495.31396484375, + "logps/rejected": -487.2955627441406, + "loss": 0.5582, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -1.2105467319488525, + "rewards/margins": 0.5193046927452087, + "rewards/rejected": -1.729851484298706, + "step": 3780 + }, + { + "epoch": 0.49, + "eval_logits/chosen": -2.388474225997925, + "eval_logits/rejected": -2.4033782482147217, + "eval_logps/chosen": -462.8208312988281, + "eval_logps/rejected": -467.9822692871094, + "eval_loss": 0.6143119931221008, + "eval_rewards/accuracies": 0.6664999723434448, + "eval_rewards/chosen": -1.3007144927978516, + "eval_rewards/margins": 0.3680078089237213, + "eval_rewards/rejected": -1.66872239112854, + "eval_runtime": 197.0702, + "eval_samples_per_second": 10.149, + "eval_steps_per_second": 5.074, + "step": 3780 + }, + { + "epoch": 0.5, + "learning_rate": 2.96894695661267e-06, + "logits/chosen": -2.604504346847534, + "logits/rejected": -2.552913188934326, + "logps/chosen": -500.4588317871094, + "logps/rejected": -461.9434509277344, + "loss": 0.6335, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.3058234453201294, + "rewards/margins": 0.26627081632614136, + "rewards/rejected": -1.572094202041626, + "step": 3790 + }, + { + "epoch": 0.5, + "eval_logits/chosen": -2.3936386108398438, + "eval_logits/rejected": -2.4086356163024902, + "eval_logps/chosen": -459.9853515625, + "eval_logps/rejected": -464.7911071777344, + "eval_loss": 0.6135148406028748, + "eval_rewards/accuracies": 0.6654999852180481, + "eval_rewards/chosen": -1.2723592519760132, + "eval_rewards/margins": 0.364451140165329, + "eval_rewards/rejected": -1.636810302734375, + "eval_runtime": 196.8777, + "eval_samples_per_second": 10.159, + "eval_steps_per_second": 5.079, + "step": 3790 + }, + { + "epoch": 0.5, + "learning_rate": 2.9577225401949773e-06, + "logits/chosen": -2.5141632556915283, + "logits/rejected": -2.5253939628601074, + "logps/chosen": -403.39288330078125, + "logps/rejected": -421.74432373046875, + "loss": 0.6201, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.193432092666626, + "rewards/margins": 0.3321394920349121, + "rewards/rejected": -1.525571346282959, + "step": 3800 + }, + { + "epoch": 0.5, + "eval_logits/chosen": -2.402308940887451, + "eval_logits/rejected": -2.417587995529175, + "eval_logps/chosen": -453.1758117675781, + "eval_logps/rejected": -457.08599853515625, + "eval_loss": 0.6127331256866455, + "eval_rewards/accuracies": 0.6685000061988831, + "eval_rewards/chosen": -1.2042638063430786, + "eval_rewards/margins": 0.35549601912498474, + "eval_rewards/rejected": -1.5597598552703857, + "eval_runtime": 197.0682, + "eval_samples_per_second": 10.149, + "eval_steps_per_second": 5.074, + "step": 3800 + }, + { + "epoch": 0.5, + "learning_rate": 2.946488568802324e-06, + "logits/chosen": -2.5278308391571045, + "logits/rejected": -2.468945264816284, + "logps/chosen": -459.3369140625, + "logps/rejected": -458.6595764160156, + "loss": 0.6459, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -1.274712324142456, + "rewards/margins": 0.2448592483997345, + "rewards/rejected": -1.5195715427398682, + "step": 3810 + }, + { + "epoch": 0.5, + "eval_logits/chosen": -2.4084715843200684, + "eval_logits/rejected": -2.4238317012786865, + "eval_logps/chosen": -447.7943420410156, + "eval_logps/rejected": -451.07440185546875, + "eval_loss": 0.6117669939994812, + "eval_rewards/accuracies": 0.6690000295639038, + "eval_rewards/chosen": -1.150449514389038, + "eval_rewards/margins": 0.34919407963752747, + "eval_rewards/rejected": -1.4996436834335327, + "eval_runtime": 196.904, + "eval_samples_per_second": 10.157, + "eval_steps_per_second": 5.079, + "step": 3810 + }, + { + "epoch": 0.5, + "learning_rate": 2.935245276944278e-06, + "logits/chosen": -2.5466935634613037, + "logits/rejected": -2.574474811553955, + "logps/chosen": -471.45513916015625, + "logps/rejected": -456.5826110839844, + "loss": 0.6382, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.0596699714660645, + "rewards/margins": 0.280234158039093, + "rewards/rejected": -1.3399040699005127, + "step": 3820 + }, + { + "epoch": 0.5, + "eval_logits/chosen": -2.417717218399048, + "eval_logits/rejected": -2.4337222576141357, + "eval_logps/chosen": -441.5482177734375, + "eval_logps/rejected": -443.9344482421875, + "eval_loss": 0.6117742657661438, + "eval_rewards/accuracies": 0.6625000238418579, + "eval_rewards/chosen": -1.0879883766174316, + "eval_rewards/margins": 0.3402560353279114, + "eval_rewards/rejected": -1.4282443523406982, + "eval_runtime": 197.043, + "eval_samples_per_second": 10.15, + "eval_steps_per_second": 5.075, + "step": 3820 + }, + { + "epoch": 0.5, + "learning_rate": 2.9239928993249723e-06, + "logits/chosen": -2.602570056915283, + "logits/rejected": -2.574509382247925, + "logps/chosen": -433.97515869140625, + "logps/rejected": -443.5970153808594, + "loss": 0.5423, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.881142258644104, + "rewards/margins": 0.5500217080116272, + "rewards/rejected": -1.431164026260376, + "step": 3830 + }, + { + "epoch": 0.5, + "eval_logits/chosen": -2.4167044162750244, + "eval_logits/rejected": -2.433227777481079, + "eval_logps/chosen": -440.26531982421875, + "eval_logps/rejected": -442.4804382324219, + "eval_loss": 0.6125693321228027, + "eval_rewards/accuracies": 0.6629999876022339, + "eval_rewards/chosen": -1.075158715248108, + "eval_rewards/margins": 0.3385455012321472, + "eval_rewards/rejected": -1.4137042760849, + "eval_runtime": 197.1655, + "eval_samples_per_second": 10.144, + "eval_steps_per_second": 5.072, + "step": 3830 + }, + { + "epoch": 0.5, + "learning_rate": 2.912731670838207e-06, + "logits/chosen": -2.550351858139038, + "logits/rejected": -2.545172691345215, + "logps/chosen": -422.2438049316406, + "logps/rejected": -444.660888671875, + "loss": 0.6351, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.0034587383270264, + "rewards/margins": 0.2920977473258972, + "rewards/rejected": -1.295556664466858, + "step": 3840 + }, + { + "epoch": 0.5, + "eval_logits/chosen": -2.4187636375427246, + "eval_logits/rejected": -2.4356865882873535, + "eval_logps/chosen": -438.0135498046875, + "eval_logps/rejected": -440.0002136230469, + "eval_loss": 0.6129617691040039, + "eval_rewards/accuracies": 0.6625000238418579, + "eval_rewards/chosen": -1.0526416301727295, + "eval_rewards/margins": 0.33625999093055725, + "eval_rewards/rejected": -1.3889015913009644, + "eval_runtime": 196.7803, + "eval_samples_per_second": 10.164, + "eval_steps_per_second": 5.082, + "step": 3840 + }, + { + "epoch": 0.5, + "learning_rate": 2.901461826562543e-06, + "logits/chosen": -2.6022095680236816, + "logits/rejected": -2.608586311340332, + "logps/chosen": -382.9307556152344, + "logps/rejected": -402.4649353027344, + "loss": 0.5856, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.0170080661773682, + "rewards/margins": 0.3895450234413147, + "rewards/rejected": -1.4065531492233276, + "step": 3850 + }, + { + "epoch": 0.5, + "eval_logits/chosen": -2.4154767990112305, + "eval_logits/rejected": -2.4327642917633057, + "eval_logps/chosen": -433.6244812011719, + "eval_logps/rejected": -435.01007080078125, + "eval_loss": 0.6131948232650757, + "eval_rewards/accuracies": 0.6614999771118164, + "eval_rewards/chosen": -1.0087506771087646, + "eval_rewards/margins": 0.3302498161792755, + "eval_rewards/rejected": -1.3390004634857178, + "eval_runtime": 197.0186, + "eval_samples_per_second": 10.151, + "eval_steps_per_second": 5.076, + "step": 3850 + }, + { + "epoch": 0.51, + "learning_rate": 2.8901836017563966e-06, + "logits/chosen": -2.5830795764923096, + "logits/rejected": -2.559356689453125, + "logps/chosen": -422.36932373046875, + "logps/rejected": -424.05303955078125, + "loss": 0.6039, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.953314483165741, + "rewards/margins": 0.3424040675163269, + "rewards/rejected": -1.2957185506820679, + "step": 3860 + }, + { + "epoch": 0.51, + "eval_logits/chosen": -2.411612033843994, + "eval_logits/rejected": -2.4291622638702393, + "eval_logps/chosen": -432.36639404296875, + "eval_logps/rejected": -433.6270446777344, + "eval_loss": 0.6130424737930298, + "eval_rewards/accuracies": 0.6644999980926514, + "eval_rewards/chosen": -0.9961698055267334, + "eval_rewards/margins": 0.32900041341781616, + "eval_rewards/rejected": -1.3251702785491943, + "eval_runtime": 196.9225, + "eval_samples_per_second": 10.156, + "eval_steps_per_second": 5.078, + "step": 3860 + }, + { + "epoch": 0.51, + "learning_rate": 2.8788972318531272e-06, + "logits/chosen": -2.541175127029419, + "logits/rejected": -2.5342342853546143, + "logps/chosen": -417.62567138671875, + "logps/rejected": -431.13104248046875, + "loss": 0.6142, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -1.095897912979126, + "rewards/margins": 0.3115997314453125, + "rewards/rejected": -1.4074976444244385, + "step": 3870 + }, + { + "epoch": 0.51, + "eval_logits/chosen": -2.402695417404175, + "eval_logits/rejected": -2.4197804927825928, + "eval_logps/chosen": -436.37451171875, + "eval_logps/rejected": -438.23828125, + "eval_loss": 0.6130448579788208, + "eval_rewards/accuracies": 0.6660000085830688, + "eval_rewards/chosen": -1.036251187324524, + "eval_rewards/margins": 0.3350312411785126, + "eval_rewards/rejected": -1.3712825775146484, + "eval_runtime": 197.1842, + "eval_samples_per_second": 10.143, + "eval_steps_per_second": 5.071, + "step": 3870 + }, + { + "epoch": 0.51, + "learning_rate": 2.8676029524561255e-06, + "logits/chosen": -2.5351319313049316, + "logits/rejected": -2.587127447128296, + "logps/chosen": -466.9495544433594, + "logps/rejected": -477.93048095703125, + "loss": 0.6128, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.0128754377365112, + "rewards/margins": 0.34568047523498535, + "rewards/rejected": -1.3585560321807861, + "step": 3880 + }, + { + "epoch": 0.51, + "eval_logits/chosen": -2.406038284301758, + "eval_logits/rejected": -2.422903299331665, + "eval_logps/chosen": -438.3699951171875, + "eval_logps/rejected": -440.46466064453125, + "eval_loss": 0.6125989556312561, + "eval_rewards/accuracies": 0.6685000061988831, + "eval_rewards/chosen": -1.0562056303024292, + "eval_rewards/margins": 0.33734050393104553, + "eval_rewards/rejected": -1.393546223640442, + "eval_runtime": 197.0942, + "eval_samples_per_second": 10.147, + "eval_steps_per_second": 5.074, + "step": 3880 + }, + { + "epoch": 0.51, + "learning_rate": 2.8563009993338906e-06, + "logits/chosen": -2.5570359230041504, + "logits/rejected": -2.5582470893859863, + "logps/chosen": -413.457275390625, + "logps/rejected": -443.176513671875, + "loss": 0.5771, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -1.0789310932159424, + "rewards/margins": 0.4563199579715729, + "rewards/rejected": -1.5352510213851929, + "step": 3890 + }, + { + "epoch": 0.51, + "eval_logits/chosen": -2.400045871734619, + "eval_logits/rejected": -2.416555643081665, + "eval_logps/chosen": -444.1776428222656, + "eval_logps/rejected": -447.1835632324219, + "eval_loss": 0.6128532290458679, + "eval_rewards/accuracies": 0.6629999876022339, + "eval_rewards/chosen": -1.1142823696136475, + "eval_rewards/margins": 0.3464534878730774, + "eval_rewards/rejected": -1.46073579788208, + "eval_runtime": 197.0771, + "eval_samples_per_second": 10.148, + "eval_steps_per_second": 5.074, + "step": 3890 + }, + { + "epoch": 0.51, + "learning_rate": 2.844991608415113e-06, + "logits/chosen": -2.6397032737731934, + "logits/rejected": -2.6185808181762695, + "logps/chosen": -454.1117248535156, + "logps/rejected": -481.529541015625, + "loss": 0.6089, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.205904245376587, + "rewards/margins": 0.36940625309944153, + "rewards/rejected": -1.575310468673706, + "step": 3900 + }, + { + "epoch": 0.51, + "eval_logits/chosen": -2.393319845199585, + "eval_logits/rejected": -2.409630537033081, + "eval_logps/chosen": -447.9149169921875, + "eval_logps/rejected": -451.6171875, + "eval_loss": 0.6128678917884827, + "eval_rewards/accuracies": 0.659500002861023, + "eval_rewards/chosen": -1.1516549587249756, + "eval_rewards/margins": 0.35341697931289673, + "eval_rewards/rejected": -1.5050721168518066, + "eval_runtime": 196.9582, + "eval_samples_per_second": 10.154, + "eval_steps_per_second": 5.077, + "step": 3900 + }, + { + "epoch": 0.51, + "learning_rate": 2.833675015783746e-06, + "logits/chosen": -2.552631378173828, + "logits/rejected": -2.571286678314209, + "logps/chosen": -406.80224609375, + "logps/rejected": -457.42413330078125, + "loss": 0.5962, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -1.2279528379440308, + "rewards/margins": 0.39623597264289856, + "rewards/rejected": -1.624189019203186, + "step": 3910 + }, + { + "epoch": 0.51, + "eval_logits/chosen": -2.378218650817871, + "eval_logits/rejected": -2.3942618370056152, + "eval_logps/chosen": -455.72216796875, + "eval_logps/rejected": -460.4048156738281, + "eval_loss": 0.6134702563285828, + "eval_rewards/accuracies": 0.6639999747276306, + "eval_rewards/chosen": -1.2297275066375732, + "eval_rewards/margins": 0.36322060227394104, + "eval_rewards/rejected": -1.5929479598999023, + "eval_runtime": 197.1673, + "eval_samples_per_second": 10.144, + "eval_steps_per_second": 5.072, + "step": 3910 + }, + { + "epoch": 0.51, + "learning_rate": 2.8223514576740784e-06, + "logits/chosen": -2.4648399353027344, + "logits/rejected": -2.447777509689331, + "logps/chosen": -392.42431640625, + "logps/rejected": -459.552001953125, + "loss": 0.6028, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -1.0850086212158203, + "rewards/margins": 0.3459857106208801, + "rewards/rejected": -1.4309942722320557, + "step": 3920 + }, + { + "epoch": 0.51, + "eval_logits/chosen": -2.3691859245300293, + "eval_logits/rejected": -2.3851804733276367, + "eval_logps/chosen": -459.0703430175781, + "eval_logps/rejected": -464.10882568359375, + "eval_loss": 0.6140798330307007, + "eval_rewards/accuracies": 0.6625000238418579, + "eval_rewards/chosen": -1.263209342956543, + "eval_rewards/margins": 0.3667786419391632, + "eval_rewards/rejected": -1.6299879550933838, + "eval_runtime": 197.0307, + "eval_samples_per_second": 10.151, + "eval_steps_per_second": 5.075, + "step": 3920 + }, + { + "epoch": 0.51, + "learning_rate": 2.8110211704658073e-06, + "logits/chosen": -2.529292106628418, + "logits/rejected": -2.50898814201355, + "logps/chosen": -500.024169921875, + "logps/rejected": -481.23046875, + "loss": 0.5829, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.2268387079238892, + "rewards/margins": 0.4068872034549713, + "rewards/rejected": -1.6337261199951172, + "step": 3930 + }, + { + "epoch": 0.51, + "eval_logits/chosen": -2.362501859664917, + "eval_logits/rejected": -2.3783164024353027, + "eval_logps/chosen": -459.94146728515625, + "eval_logps/rejected": -465.1200256347656, + "eval_loss": 0.6143542528152466, + "eval_rewards/accuracies": 0.6639999747276306, + "eval_rewards/chosen": -1.2719205617904663, + "eval_rewards/margins": 0.36817923188209534, + "eval_rewards/rejected": -1.6400996446609497, + "eval_runtime": 196.9832, + "eval_samples_per_second": 10.153, + "eval_steps_per_second": 5.077, + "step": 3930 + }, + { + "epoch": 0.52, + "learning_rate": 2.7996843906790955e-06, + "logits/chosen": -2.480191946029663, + "logits/rejected": -2.438917636871338, + "logps/chosen": -436.451904296875, + "logps/rejected": -451.19195556640625, + "loss": 0.6861, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -1.261759877204895, + "rewards/margins": 0.2249007672071457, + "rewards/rejected": -1.4866605997085571, + "step": 3940 + }, + { + "epoch": 0.52, + "eval_logits/chosen": -2.3652889728546143, + "eval_logits/rejected": -2.380469560623169, + "eval_logps/chosen": -463.49749755859375, + "eval_logps/rejected": -469.0307312011719, + "eval_loss": 0.6125316619873047, + "eval_rewards/accuracies": 0.6685000061988831, + "eval_rewards/chosen": -1.3074814081192017, + "eval_rewards/margins": 0.37172552943229675, + "eval_rewards/rejected": -1.6792069673538208, + "eval_runtime": 197.3734, + "eval_samples_per_second": 10.133, + "eval_steps_per_second": 5.067, + "step": 3940 + }, + { + "epoch": 0.52, + "learning_rate": 2.7883413549696396e-06, + "logits/chosen": -2.589012622833252, + "logits/rejected": -2.5272421836853027, + "logps/chosen": -488.56494140625, + "logps/rejected": -494.85833740234375, + "loss": 0.538, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -1.3187625408172607, + "rewards/margins": 0.47562170028686523, + "rewards/rejected": -1.7943843603134155, + "step": 3950 + }, + { + "epoch": 0.52, + "eval_logits/chosen": -2.3566412925720215, + "eval_logits/rejected": -2.3713998794555664, + "eval_logps/chosen": -467.7171630859375, + "eval_logps/rejected": -473.5096130371094, + "eval_loss": 0.612465500831604, + "eval_rewards/accuracies": 0.6694999933242798, + "eval_rewards/chosen": -1.3496776819229126, + "eval_rewards/margins": 0.37431854009628296, + "eval_rewards/rejected": -1.7239962816238403, + "eval_runtime": 196.8504, + "eval_samples_per_second": 10.16, + "eval_steps_per_second": 5.08, + "step": 3950 + }, + { + "epoch": 0.52, + "learning_rate": 2.776992300123732e-06, + "logits/chosen": -2.451707124710083, + "logits/rejected": -2.446232318878174, + "logps/chosen": -421.21923828125, + "logps/rejected": -454.6561584472656, + "loss": 0.6141, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.2354681491851807, + "rewards/margins": 0.4316517412662506, + "rewards/rejected": -1.6671197414398193, + "step": 3960 + }, + { + "epoch": 0.52, + "eval_logits/chosen": -2.3530004024505615, + "eval_logits/rejected": -2.3678534030914307, + "eval_logps/chosen": -468.481201171875, + "eval_logps/rejected": -474.32135009765625, + "eval_loss": 0.6124312877655029, + "eval_rewards/accuracies": 0.6690000295639038, + "eval_rewards/chosen": -1.3573178052902222, + "eval_rewards/margins": 0.3747956454753876, + "eval_rewards/rejected": -1.7321133613586426, + "eval_runtime": 196.8704, + "eval_samples_per_second": 10.159, + "eval_steps_per_second": 5.079, + "step": 3960 + }, + { + "epoch": 0.52, + "learning_rate": 2.7656374630533113e-06, + "logits/chosen": -2.5897960662841797, + "logits/rejected": -2.5861315727233887, + "logps/chosen": -422.38079833984375, + "logps/rejected": -462.06964111328125, + "loss": 0.5655, + "rewards/accuracies": 0.7875000238418579, + "rewards/chosen": -1.283445119857788, + "rewards/margins": 0.44523996114730835, + "rewards/rejected": -1.7286850214004517, + "step": 3970 + }, + { + "epoch": 0.52, + "eval_logits/chosen": -2.3446178436279297, + "eval_logits/rejected": -2.359534740447998, + "eval_logps/chosen": -471.0897521972656, + "eval_logps/rejected": -477.40899658203125, + "eval_loss": 0.6137044429779053, + "eval_rewards/accuracies": 0.6654999852180481, + "eval_rewards/chosen": -1.3834036588668823, + "eval_rewards/margins": 0.37958598136901855, + "eval_rewards/rejected": -1.7629896402359009, + "eval_runtime": 196.7457, + "eval_samples_per_second": 10.165, + "eval_steps_per_second": 5.083, + "step": 3970 + }, + { + "epoch": 0.52, + "learning_rate": 2.754277080791021e-06, + "logits/chosen": -2.482008457183838, + "logits/rejected": -2.4874167442321777, + "logps/chosen": -466.5902404785156, + "logps/rejected": -471.9017639160156, + "loss": 0.7222, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -1.412452220916748, + "rewards/margins": 0.17661504447460175, + "rewards/rejected": -1.5890672206878662, + "step": 3980 + }, + { + "epoch": 0.52, + "eval_logits/chosen": -2.3433659076690674, + "eval_logits/rejected": -2.3585433959960938, + "eval_logps/chosen": -469.8363037109375, + "eval_logps/rejected": -476.04425048828125, + "eval_loss": 0.6140997409820557, + "eval_rewards/accuracies": 0.6644999980926514, + "eval_rewards/chosen": -1.3708688020706177, + "eval_rewards/margins": 0.37847331166267395, + "eval_rewards/rejected": -1.7493420839309692, + "eval_runtime": 196.843, + "eval_samples_per_second": 10.16, + "eval_steps_per_second": 5.08, + "step": 3980 + }, + { + "epoch": 0.52, + "learning_rate": 2.742911390485262e-06, + "logits/chosen": -2.4135918617248535, + "logits/rejected": -2.4417901039123535, + "logps/chosen": -402.02264404296875, + "logps/rejected": -404.3270263671875, + "loss": 0.6808, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.3463059663772583, + "rewards/margins": 0.1900065392255783, + "rewards/rejected": -1.536312460899353, + "step": 3990 + }, + { + "epoch": 0.52, + "eval_logits/chosen": -2.34938645362854, + "eval_logits/rejected": -2.364652395248413, + "eval_logps/chosen": -467.2671203613281, + "eval_logps/rejected": -472.96368408203125, + "eval_loss": 0.6125109195709229, + "eval_rewards/accuracies": 0.6639999747276306, + "eval_rewards/chosen": -1.345177173614502, + "eval_rewards/margins": 0.3733597993850708, + "eval_rewards/rejected": -1.7185369729995728, + "eval_runtime": 196.8367, + "eval_samples_per_second": 10.161, + "eval_steps_per_second": 5.08, + "step": 3990 + }, + { + "epoch": 0.52, + "learning_rate": 2.731540629395239e-06, + "logits/chosen": -2.462125778198242, + "logits/rejected": -2.4748053550720215, + "logps/chosen": -467.29669189453125, + "logps/rejected": -465.6487731933594, + "loss": 0.6083, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.2891066074371338, + "rewards/margins": 0.29626819491386414, + "rewards/rejected": -1.5853749513626099, + "step": 4000 + }, + { + "epoch": 0.52, + "eval_logits/chosen": -2.34723162651062, + "eval_logits/rejected": -2.3619539737701416, + "eval_logps/chosen": -473.87408447265625, + "eval_logps/rejected": -480.2319641113281, + "eval_loss": 0.6121568083763123, + "eval_rewards/accuracies": 0.6660000085830688, + "eval_rewards/chosen": -1.4112465381622314, + "eval_rewards/margins": 0.3799728453159332, + "eval_rewards/rejected": -1.7912193536758423, + "eval_runtime": 196.9069, + "eval_samples_per_second": 10.157, + "eval_steps_per_second": 5.079, + "step": 4000 + }, + { + "epoch": 0.52, + "learning_rate": 2.7201650348860115e-06, + "logits/chosen": -2.5356340408325195, + "logits/rejected": -2.571254014968872, + "logps/chosen": -432.988525390625, + "logps/rejected": -411.39306640625, + "loss": 0.5894, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.3545644283294678, + "rewards/margins": 0.3995421826839447, + "rewards/rejected": -1.7541065216064453, + "step": 4010 + }, + { + "epoch": 0.52, + "eval_logits/chosen": -2.3608150482177734, + "eval_logits/rejected": -2.3753154277801514, + "eval_logps/chosen": -475.13519287109375, + "eval_logps/rejected": -481.7857360839844, + "eval_loss": 0.6117491126060486, + "eval_rewards/accuracies": 0.6690000295639038, + "eval_rewards/chosen": -1.4238581657409668, + "eval_rewards/margins": 0.3828992545604706, + "eval_rewards/rejected": -1.8067574501037598, + "eval_runtime": 196.9545, + "eval_samples_per_second": 10.155, + "eval_steps_per_second": 5.077, + "step": 4010 + }, + { + "epoch": 0.53, + "learning_rate": 2.7087848444235354e-06, + "logits/chosen": -2.5912222862243652, + "logits/rejected": -2.531287431716919, + "logps/chosen": -489.0033264160156, + "logps/rejected": -509.7576599121094, + "loss": 0.5505, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -1.4281346797943115, + "rewards/margins": 0.5264540910720825, + "rewards/rejected": -1.9545888900756836, + "step": 4020 + }, + { + "epoch": 0.53, + "eval_logits/chosen": -2.37618350982666, + "eval_logits/rejected": -2.390709638595581, + "eval_logps/chosen": -469.84698486328125, + "eval_logps/rejected": -475.90283203125, + "eval_loss": 0.6106529831886292, + "eval_rewards/accuracies": 0.6685000061988831, + "eval_rewards/chosen": -1.3709757328033447, + "eval_rewards/margins": 0.376952588558197, + "eval_rewards/rejected": -1.747928500175476, + "eval_runtime": 196.9284, + "eval_samples_per_second": 10.156, + "eval_steps_per_second": 5.078, + "step": 4020 + }, + { + "epoch": 0.53, + "learning_rate": 2.697400295569707e-06, + "logits/chosen": -2.601231575012207, + "logits/rejected": -2.6253762245178223, + "logps/chosen": -414.8094177246094, + "logps/rejected": -472.2815856933594, + "loss": 0.5603, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -1.2042758464813232, + "rewards/margins": 0.5171712040901184, + "rewards/rejected": -1.7214473485946655, + "step": 4030 + }, + { + "epoch": 0.53, + "eval_logits/chosen": -2.3801462650299072, + "eval_logits/rejected": -2.394869565963745, + "eval_logps/chosen": -464.7630920410156, + "eval_logps/rejected": -470.367919921875, + "eval_loss": 0.6102996468544006, + "eval_rewards/accuracies": 0.6639999747276306, + "eval_rewards/chosen": -1.3201372623443604, + "eval_rewards/margins": 0.3724416494369507, + "eval_rewards/rejected": -1.692578911781311, + "eval_runtime": 196.9835, + "eval_samples_per_second": 10.153, + "eval_steps_per_second": 5.077, + "step": 4030 + }, + { + "epoch": 0.53, + "learning_rate": 2.6860116259774065e-06, + "logits/chosen": -2.525394916534424, + "logits/rejected": -2.496546983718872, + "logps/chosen": -484.4578552246094, + "logps/rejected": -508.57562255859375, + "loss": 0.5443, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -1.2538907527923584, + "rewards/margins": 0.5189865827560425, + "rewards/rejected": -1.7728774547576904, + "step": 4040 + }, + { + "epoch": 0.53, + "eval_logits/chosen": -2.37362003326416, + "eval_logits/rejected": -2.3889076709747314, + "eval_logps/chosen": -463.3284912109375, + "eval_logps/rejected": -468.995361328125, + "eval_loss": 0.6113187074661255, + "eval_rewards/accuracies": 0.6639999747276306, + "eval_rewards/chosen": -1.3057914972305298, + "eval_rewards/margins": 0.37306222319602966, + "eval_rewards/rejected": -1.6788537502288818, + "eval_runtime": 197.1492, + "eval_samples_per_second": 10.145, + "eval_steps_per_second": 5.072, + "step": 4040 + }, + { + "epoch": 0.53, + "learning_rate": 2.674619073385531e-06, + "logits/chosen": -2.4929561614990234, + "logits/rejected": -2.495772361755371, + "logps/chosen": -421.23785400390625, + "logps/rejected": -454.38140869140625, + "loss": 0.602, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -1.2588123083114624, + "rewards/margins": 0.44167566299438477, + "rewards/rejected": -1.7004880905151367, + "step": 4050 + }, + { + "epoch": 0.53, + "eval_logits/chosen": -2.362971544265747, + "eval_logits/rejected": -2.37823748588562, + "eval_logps/chosen": -467.1461181640625, + "eval_logps/rejected": -473.2948913574219, + "eval_loss": 0.6117571592330933, + "eval_rewards/accuracies": 0.6650000214576721, + "eval_rewards/chosen": -1.343967080116272, + "eval_rewards/margins": 0.37788188457489014, + "eval_rewards/rejected": -1.721848964691162, + "eval_runtime": 197.0418, + "eval_samples_per_second": 10.15, + "eval_steps_per_second": 5.075, + "step": 4050 + }, + { + "epoch": 0.53, + "learning_rate": 2.663222875614038e-06, + "logits/chosen": -2.5204296112060547, + "logits/rejected": -2.4171836376190186, + "logps/chosen": -450.54150390625, + "logps/rejected": -466.87615966796875, + "loss": 0.6865, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.4656872749328613, + "rewards/margins": 0.18536174297332764, + "rewards/rejected": -1.6510488986968994, + "step": 4060 + }, + { + "epoch": 0.53, + "eval_logits/chosen": -2.3661386966705322, + "eval_logits/rejected": -2.3818247318267822, + "eval_logps/chosen": -461.9250793457031, + "eval_logps/rejected": -467.6927490234375, + "eval_loss": 0.6116368174552917, + "eval_rewards/accuracies": 0.6629999876022339, + "eval_rewards/chosen": -1.291756510734558, + "eval_rewards/margins": 0.3740708827972412, + "eval_rewards/rejected": -1.6658276319503784, + "eval_runtime": 197.0338, + "eval_samples_per_second": 10.151, + "eval_steps_per_second": 5.075, + "step": 4060 + }, + { + "epoch": 0.53, + "learning_rate": 2.6518232705589775e-06, + "logits/chosen": -2.5525612831115723, + "logits/rejected": -2.538083553314209, + "logps/chosen": -455.64080810546875, + "logps/rejected": -495.1800231933594, + "loss": 0.5712, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -1.2466583251953125, + "rewards/margins": 0.4873170852661133, + "rewards/rejected": -1.7339754104614258, + "step": 4070 + }, + { + "epoch": 0.53, + "eval_logits/chosen": -2.3656821250915527, + "eval_logits/rejected": -2.3814144134521484, + "eval_logps/chosen": -461.1421813964844, + "eval_logps/rejected": -467.19329833984375, + "eval_loss": 0.6121630072593689, + "eval_rewards/accuracies": 0.6654999852180481, + "eval_rewards/chosen": -1.2839277982711792, + "eval_rewards/margins": 0.37690529227256775, + "eval_rewards/rejected": -1.6608332395553589, + "eval_runtime": 196.9705, + "eval_samples_per_second": 10.154, + "eval_steps_per_second": 5.077, + "step": 4070 + }, + { + "epoch": 0.53, + "learning_rate": 2.640420496187528e-06, + "logits/chosen": -2.457648754119873, + "logits/rejected": -2.4747841358184814, + "logps/chosen": -490.0325622558594, + "logps/rejected": -483.89453125, + "loss": 0.5086, + "rewards/accuracies": 0.7875000238418579, + "rewards/chosen": -1.2399706840515137, + "rewards/margins": 0.6194084882736206, + "rewards/rejected": -1.8593791723251343, + "step": 4080 + }, + { + "epoch": 0.53, + "eval_logits/chosen": -2.368699073791504, + "eval_logits/rejected": -2.3840346336364746, + "eval_logps/chosen": -463.33380126953125, + "eval_logps/rejected": -469.78082275390625, + "eval_loss": 0.6119689345359802, + "eval_rewards/accuracies": 0.6635000109672546, + "eval_rewards/chosen": -1.3058441877365112, + "eval_rewards/margins": 0.3808634877204895, + "eval_rewards/rejected": -1.686707854270935, + "eval_runtime": 196.7765, + "eval_samples_per_second": 10.164, + "eval_steps_per_second": 5.082, + "step": 4080 + }, + { + "epoch": 0.54, + "learning_rate": 2.629014790533025e-06, + "logits/chosen": -2.52437424659729, + "logits/rejected": -2.452230930328369, + "logps/chosen": -495.469482421875, + "logps/rejected": -457.87713623046875, + "loss": 0.6036, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -1.306132197380066, + "rewards/margins": 0.4579140543937683, + "rewards/rejected": -1.764046311378479, + "step": 4090 + }, + { + "epoch": 0.54, + "eval_logits/chosen": -2.366572380065918, + "eval_logits/rejected": -2.381913185119629, + "eval_logps/chosen": -467.9704284667969, + "eval_logps/rejected": -475.1983642578125, + "eval_loss": 0.6131882071495056, + "eval_rewards/accuracies": 0.6669999957084656, + "eval_rewards/chosen": -1.3522101640701294, + "eval_rewards/margins": 0.3886730372905731, + "eval_rewards/rejected": -1.7408833503723145, + "eval_runtime": 196.9962, + "eval_samples_per_second": 10.152, + "eval_steps_per_second": 5.076, + "step": 4090 + }, + { + "epoch": 0.54, + "learning_rate": 2.617606391689996e-06, + "logits/chosen": -2.5924911499023438, + "logits/rejected": -2.550729274749756, + "logps/chosen": -465.5814514160156, + "logps/rejected": -473.2737731933594, + "loss": 0.6175, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -1.2862884998321533, + "rewards/margins": 0.428173303604126, + "rewards/rejected": -1.7144616842269897, + "step": 4100 + }, + { + "epoch": 0.54, + "eval_logits/chosen": -2.3732011318206787, + "eval_logits/rejected": -2.38840651512146, + "eval_logps/chosen": -468.1484069824219, + "eval_logps/rejected": -475.3802490234375, + "eval_loss": 0.6129105091094971, + "eval_rewards/accuracies": 0.6629999876022339, + "eval_rewards/chosen": -1.3539899587631226, + "eval_rewards/margins": 0.3887125849723816, + "eval_rewards/rejected": -1.7427024841308594, + "eval_runtime": 196.9234, + "eval_samples_per_second": 10.156, + "eval_steps_per_second": 5.078, + "step": 4100 + }, + { + "epoch": 0.54, + "learning_rate": 2.6061955378091896e-06, + "logits/chosen": -2.5106284618377686, + "logits/rejected": -2.460104465484619, + "logps/chosen": -426.4384765625, + "logps/rejected": -476.866455078125, + "loss": 0.5335, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -1.2528026103973389, + "rewards/margins": 0.600531816482544, + "rewards/rejected": -1.8533344268798828, + "step": 4110 + }, + { + "epoch": 0.54, + "eval_logits/chosen": -2.374972343444824, + "eval_logits/rejected": -2.39029598236084, + "eval_logps/chosen": -465.0861511230469, + "eval_logps/rejected": -472.0409851074219, + "eval_loss": 0.612612247467041, + "eval_rewards/accuracies": 0.6654999852180481, + "eval_rewards/chosen": -1.3233674764633179, + "eval_rewards/margins": 0.38594210147857666, + "eval_rewards/rejected": -1.709309697151184, + "eval_runtime": 196.8488, + "eval_samples_per_second": 10.16, + "eval_steps_per_second": 5.08, + "step": 4110 + }, + { + "epoch": 0.54, + "learning_rate": 2.5947824670926025e-06, + "logits/chosen": -2.5935683250427246, + "logits/rejected": -2.5762457847595215, + "logps/chosen": -423.15423583984375, + "logps/rejected": -490.165771484375, + "loss": 0.5439, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -1.1569029092788696, + "rewards/margins": 0.5699300765991211, + "rewards/rejected": -1.7268329858779907, + "step": 4120 + }, + { + "epoch": 0.54, + "eval_logits/chosen": -2.3668456077575684, + "eval_logits/rejected": -2.3826231956481934, + "eval_logps/chosen": -457.88714599609375, + "eval_logps/rejected": -464.2001953125, + "eval_loss": 0.6133199334144592, + "eval_rewards/accuracies": 0.6660000085830688, + "eval_rewards/chosen": -1.251376986503601, + "eval_rewards/margins": 0.3795250356197357, + "eval_rewards/rejected": -1.6309019327163696, + "eval_runtime": 197.1601, + "eval_samples_per_second": 10.144, + "eval_steps_per_second": 5.072, + "step": 4120 + }, + { + "epoch": 0.54, + "learning_rate": 2.583367417788508e-06, + "logits/chosen": -2.451611042022705, + "logits/rejected": -2.436627149581909, + "logps/chosen": -442.36431884765625, + "logps/rejected": -469.06048583984375, + "loss": 0.5798, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.2912431955337524, + "rewards/margins": 0.5064207315444946, + "rewards/rejected": -1.797663927078247, + "step": 4130 + }, + { + "epoch": 0.54, + "eval_logits/chosen": -2.359987258911133, + "eval_logits/rejected": -2.375581979751587, + "eval_logps/chosen": -456.96063232421875, + "eval_logps/rejected": -463.28228759765625, + "eval_loss": 0.6134931445121765, + "eval_rewards/accuracies": 0.6650000214576721, + "eval_rewards/chosen": -1.2421122789382935, + "eval_rewards/margins": 0.3796096742153168, + "eval_rewards/rejected": -1.6217222213745117, + "eval_runtime": 196.9855, + "eval_samples_per_second": 10.153, + "eval_steps_per_second": 5.077, + "step": 4130 + }, + { + "epoch": 0.54, + "learning_rate": 2.5719506281864838e-06, + "logits/chosen": -2.603020191192627, + "logits/rejected": -2.580487012863159, + "logps/chosen": -469.97601318359375, + "logps/rejected": -435.74835205078125, + "loss": 0.5875, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.1624748706817627, + "rewards/margins": 0.44589272141456604, + "rewards/rejected": -1.608367681503296, + "step": 4140 + }, + { + "epoch": 0.54, + "eval_logits/chosen": -2.3628158569335938, + "eval_logits/rejected": -2.3783905506134033, + "eval_logps/chosen": -457.54718017578125, + "eval_logps/rejected": -464.145263671875, + "eval_loss": 0.6132175922393799, + "eval_rewards/accuracies": 0.6679999828338623, + "eval_rewards/chosen": -1.247977614402771, + "eval_rewards/margins": 0.3823748826980591, + "eval_rewards/rejected": -1.6303523778915405, + "eval_runtime": 196.9295, + "eval_samples_per_second": 10.156, + "eval_steps_per_second": 5.078, + "step": 4140 + }, + { + "epoch": 0.54, + "learning_rate": 2.5605323366124335e-06, + "logits/chosen": -2.4823946952819824, + "logits/rejected": -2.399623394012451, + "logps/chosen": -442.94219970703125, + "logps/rejected": -465.85443115234375, + "loss": 0.6093, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.2800296545028687, + "rewards/margins": 0.39251285791397095, + "rewards/rejected": -1.6725425720214844, + "step": 4150 + }, + { + "epoch": 0.54, + "eval_logits/chosen": -2.375561475753784, + "eval_logits/rejected": -2.3912646770477295, + "eval_logps/chosen": -453.0314636230469, + "eval_logps/rejected": -459.216064453125, + "eval_loss": 0.6121273636817932, + "eval_rewards/accuracies": 0.6685000061988831, + "eval_rewards/chosen": -1.2028201818466187, + "eval_rewards/margins": 0.3782404065132141, + "eval_rewards/rejected": -1.581060528755188, + "eval_runtime": 197.1211, + "eval_samples_per_second": 10.146, + "eval_steps_per_second": 5.073, + "step": 4150 + }, + { + "epoch": 0.54, + "learning_rate": 2.5491127814236172e-06, + "logits/chosen": -2.570061445236206, + "logits/rejected": -2.5789883136749268, + "logps/chosen": -378.7374572753906, + "logps/rejected": -458.75421142578125, + "loss": 0.6094, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.094857931137085, + "rewards/margins": 0.36024436354637146, + "rewards/rejected": -1.4551023244857788, + "step": 4160 + }, + { + "epoch": 0.54, + "eval_logits/chosen": -2.3745861053466797, + "eval_logits/rejected": -2.3902618885040283, + "eval_logps/chosen": -452.4034423828125, + "eval_logps/rejected": -458.7520751953125, + "eval_loss": 0.6126303672790527, + "eval_rewards/accuracies": 0.6675000190734863, + "eval_rewards/chosen": -1.1965399980545044, + "eval_rewards/margins": 0.3798801302909851, + "eval_rewards/rejected": -1.5764203071594238, + "eval_runtime": 197.2083, + "eval_samples_per_second": 10.142, + "eval_steps_per_second": 5.071, + "step": 4160 + }, + { + "epoch": 0.55, + "learning_rate": 2.537692201003671e-06, + "logits/chosen": -2.538421869277954, + "logits/rejected": -2.5713725090026855, + "logps/chosen": -450.49005126953125, + "logps/rejected": -484.93487548828125, + "loss": 0.5578, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.2483497858047485, + "rewards/margins": 0.5216721296310425, + "rewards/rejected": -1.7700217962265015, + "step": 4170 + }, + { + "epoch": 0.55, + "eval_logits/chosen": -2.3649206161499023, + "eval_logits/rejected": -2.3805949687957764, + "eval_logps/chosen": -451.88079833984375, + "eval_logps/rejected": -458.26397705078125, + "eval_loss": 0.6134587526321411, + "eval_rewards/accuracies": 0.6639999747276306, + "eval_rewards/chosen": -1.1913139820098877, + "eval_rewards/margins": 0.3802258372306824, + "eval_rewards/rejected": -1.5715397596359253, + "eval_runtime": 196.9874, + "eval_samples_per_second": 10.153, + "eval_steps_per_second": 5.076, + "step": 4170 + }, + { + "epoch": 0.55, + "learning_rate": 2.526270833757635e-06, + "logits/chosen": -2.5782477855682373, + "logits/rejected": -2.5254034996032715, + "logps/chosen": -440.1346130371094, + "logps/rejected": -454.3451232910156, + "loss": 0.5732, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.1584813594818115, + "rewards/margins": 0.5007287859916687, + "rewards/rejected": -1.659210205078125, + "step": 4180 + }, + { + "epoch": 0.55, + "eval_logits/chosen": -2.3597798347473145, + "eval_logits/rejected": -2.3748152256011963, + "eval_logps/chosen": -454.84210205078125, + "eval_logps/rejected": -461.6698913574219, + "eval_loss": 0.6135467290878296, + "eval_rewards/accuracies": 0.6635000109672546, + "eval_rewards/chosen": -1.2209270000457764, + "eval_rewards/margins": 0.38467180728912354, + "eval_rewards/rejected": -1.6055988073349, + "eval_runtime": 196.8673, + "eval_samples_per_second": 10.159, + "eval_steps_per_second": 5.08, + "step": 4180 + }, + { + "epoch": 0.55, + "learning_rate": 2.514848918106971e-06, + "logits/chosen": -2.5071187019348145, + "logits/rejected": -2.4454050064086914, + "logps/chosen": -454.74652099609375, + "logps/rejected": -439.2115173339844, + "loss": 0.6302, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.3234502077102661, + "rewards/margins": 0.3678116202354431, + "rewards/rejected": -1.691261649131775, + "step": 4190 + }, + { + "epoch": 0.55, + "eval_logits/chosen": -2.3559532165527344, + "eval_logits/rejected": -2.3702216148376465, + "eval_logps/chosen": -456.7697448730469, + "eval_logps/rejected": -463.5509948730469, + "eval_loss": 0.6118788719177246, + "eval_rewards/accuracies": 0.6675000190734863, + "eval_rewards/chosen": -1.2402034997940063, + "eval_rewards/margins": 0.3842066526412964, + "eval_rewards/rejected": -1.6244101524353027, + "eval_runtime": 196.8886, + "eval_samples_per_second": 10.158, + "eval_steps_per_second": 5.079, + "step": 4190 + }, + { + "epoch": 0.55, + "learning_rate": 2.503426692484594e-06, + "logits/chosen": -2.5244762897491455, + "logits/rejected": -2.511427402496338, + "logps/chosen": -434.56427001953125, + "logps/rejected": -478.21044921875, + "loss": 0.5961, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -1.1945250034332275, + "rewards/margins": 0.3992057740688324, + "rewards/rejected": -1.5937308073043823, + "step": 4200 + }, + { + "epoch": 0.55, + "eval_logits/chosen": -2.3472087383270264, + "eval_logits/rejected": -2.361002206802368, + "eval_logps/chosen": -462.36651611328125, + "eval_logps/rejected": -469.5614929199219, + "eval_loss": 0.6113600134849548, + "eval_rewards/accuracies": 0.6644999980926514, + "eval_rewards/chosen": -1.2961714267730713, + "eval_rewards/margins": 0.38834336400032043, + "eval_rewards/rejected": -1.6845147609710693, + "eval_runtime": 196.9479, + "eval_samples_per_second": 10.155, + "eval_steps_per_second": 5.077, + "step": 4200 + }, + { + "epoch": 0.55, + "learning_rate": 2.492004395329883e-06, + "logits/chosen": -2.5484352111816406, + "logits/rejected": -2.530270576477051, + "logps/chosen": -436.87493896484375, + "logps/rejected": -444.59100341796875, + "loss": 0.5818, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.1828187704086304, + "rewards/margins": 0.43469303846359253, + "rewards/rejected": -1.6175119876861572, + "step": 4210 + }, + { + "epoch": 0.55, + "eval_logits/chosen": -2.3477280139923096, + "eval_logits/rejected": -2.3614227771759033, + "eval_logps/chosen": -463.8641357421875, + "eval_logps/rejected": -471.26556396484375, + "eval_loss": 0.6109665632247925, + "eval_rewards/accuracies": 0.6660000085830688, + "eval_rewards/chosen": -1.3111472129821777, + "eval_rewards/margins": 0.39040789008140564, + "eval_rewards/rejected": -1.7015551328659058, + "eval_runtime": 196.7938, + "eval_samples_per_second": 10.163, + "eval_steps_per_second": 5.081, + "step": 4210 + }, + { + "epoch": 0.55, + "learning_rate": 2.4805822650837165e-06, + "logits/chosen": -2.426492929458618, + "logits/rejected": -2.454468011856079, + "logps/chosen": -422.4960021972656, + "logps/rejected": -492.2879333496094, + "loss": 0.5239, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -1.2959611415863037, + "rewards/margins": 0.6673166751861572, + "rewards/rejected": -1.963277816772461, + "step": 4220 + }, + { + "epoch": 0.55, + "eval_logits/chosen": -2.3370590209960938, + "eval_logits/rejected": -2.3499491214752197, + "eval_logps/chosen": -471.30816650390625, + "eval_logps/rejected": -479.48760986328125, + "eval_loss": 0.6105585694313049, + "eval_rewards/accuracies": 0.6650000214576721, + "eval_rewards/chosen": -1.3855873346328735, + "eval_rewards/margins": 0.3981887698173523, + "eval_rewards/rejected": -1.7837762832641602, + "eval_runtime": 196.8749, + "eval_samples_per_second": 10.159, + "eval_steps_per_second": 5.079, + "step": 4220 + }, + { + "epoch": 0.55, + "learning_rate": 2.4691605401834843e-06, + "logits/chosen": -2.6059975624084473, + "logits/rejected": -2.5732944011688232, + "logps/chosen": -486.0270080566406, + "logps/rejected": -500.5328063964844, + "loss": 0.6414, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -1.333966851234436, + "rewards/margins": 0.28036683797836304, + "rewards/rejected": -1.6143337488174438, + "step": 4230 + }, + { + "epoch": 0.55, + "eval_logits/chosen": -2.332699775695801, + "eval_logits/rejected": -2.3459360599517822, + "eval_logps/chosen": -468.0067443847656, + "eval_logps/rejected": -475.77203369140625, + "eval_loss": 0.6105542778968811, + "eval_rewards/accuracies": 0.6625000238418579, + "eval_rewards/chosen": -1.3525731563568115, + "eval_rewards/margins": 0.3940469026565552, + "eval_rewards/rejected": -1.7466199398040771, + "eval_runtime": 196.7934, + "eval_samples_per_second": 10.163, + "eval_steps_per_second": 5.081, + "step": 4230 + }, + { + "epoch": 0.55, + "learning_rate": 2.457739459058117e-06, + "logits/chosen": -2.6030757427215576, + "logits/rejected": -2.584155559539795, + "logps/chosen": -513.5277099609375, + "logps/rejected": -507.9750061035156, + "loss": 0.5823, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.2995240688323975, + "rewards/margins": 0.44727516174316406, + "rewards/rejected": -1.746799111366272, + "step": 4240 + }, + { + "epoch": 0.55, + "eval_logits/chosen": -2.3276073932647705, + "eval_logits/rejected": -2.3409342765808105, + "eval_logps/chosen": -466.8489990234375, + "eval_logps/rejected": -474.26348876953125, + "eval_loss": 0.6102898716926575, + "eval_rewards/accuracies": 0.6664999723434448, + "eval_rewards/chosen": -1.3409960269927979, + "eval_rewards/margins": 0.3905387222766876, + "eval_rewards/rejected": -1.7315348386764526, + "eval_runtime": 196.9447, + "eval_samples_per_second": 10.155, + "eval_steps_per_second": 5.078, + "step": 4240 + }, + { + "epoch": 0.56, + "learning_rate": 2.4463192601231054e-06, + "logits/chosen": -2.527188539505005, + "logits/rejected": -2.4350686073303223, + "logps/chosen": -512.815673828125, + "logps/rejected": -483.7102966308594, + "loss": 0.5697, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.3667986392974854, + "rewards/margins": 0.5209914445877075, + "rewards/rejected": -1.8877900838851929, + "step": 4250 + }, + { + "epoch": 0.56, + "eval_logits/chosen": -2.327854633331299, + "eval_logits/rejected": -2.3410706520080566, + "eval_logps/chosen": -465.3106689453125, + "eval_logps/rejected": -472.47796630859375, + "eval_loss": 0.6097335815429688, + "eval_rewards/accuracies": 0.6629999876022339, + "eval_rewards/chosen": -1.3256126642227173, + "eval_rewards/margins": 0.3880668580532074, + "eval_rewards/rejected": -1.713679313659668, + "eval_runtime": 197.0119, + "eval_samples_per_second": 10.152, + "eval_steps_per_second": 5.076, + "step": 4250 + }, + { + "epoch": 0.56, + "learning_rate": 2.434900181775524e-06, + "logits/chosen": -2.5026462078094482, + "logits/rejected": -2.5014119148254395, + "logps/chosen": -471.37548828125, + "logps/rejected": -479.77020263671875, + "loss": 0.6178, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.356858491897583, + "rewards/margins": 0.3824175000190735, + "rewards/rejected": -1.7392759323120117, + "step": 4260 + }, + { + "epoch": 0.56, + "eval_logits/chosen": -2.336132049560547, + "eval_logits/rejected": -2.3492181301116943, + "eval_logps/chosen": -464.2100524902344, + "eval_logps/rejected": -471.1261901855469, + "eval_loss": 0.6091320514678955, + "eval_rewards/accuracies": 0.6639999747276306, + "eval_rewards/chosen": -1.3146066665649414, + "eval_rewards/margins": 0.38555505871772766, + "eval_rewards/rejected": -1.7001614570617676, + "eval_runtime": 196.757, + "eval_samples_per_second": 10.165, + "eval_steps_per_second": 5.082, + "step": 4260 + }, + { + "epoch": 0.56, + "learning_rate": 2.4234824623890578e-06, + "logits/chosen": -2.617096424102783, + "logits/rejected": -2.5573208332061768, + "logps/chosen": -455.67352294921875, + "logps/rejected": -475.95867919921875, + "loss": 0.5538, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.3118655681610107, + "rewards/margins": 0.5087541341781616, + "rewards/rejected": -1.820619821548462, + "step": 4270 + }, + { + "epoch": 0.56, + "eval_logits/chosen": -2.335390329360962, + "eval_logits/rejected": -2.348327398300171, + "eval_logps/chosen": -464.7738952636719, + "eval_logps/rejected": -471.7409973144531, + "eval_loss": 0.6090496778488159, + "eval_rewards/accuracies": 0.6644999980926514, + "eval_rewards/chosen": -1.3202449083328247, + "eval_rewards/margins": 0.38606494665145874, + "eval_rewards/rejected": -1.7063097953796387, + "eval_runtime": 196.9399, + "eval_samples_per_second": 10.155, + "eval_steps_per_second": 5.078, + "step": 4270 + }, + { + "epoch": 0.56, + "learning_rate": 2.4120663403090193e-06, + "logits/chosen": -2.5204405784606934, + "logits/rejected": -2.515784502029419, + "logps/chosen": -462.69903564453125, + "logps/rejected": -501.7197265625, + "loss": 0.5863, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -1.3065764904022217, + "rewards/margins": 0.42397910356521606, + "rewards/rejected": -1.730555772781372, + "step": 4280 + }, + { + "epoch": 0.56, + "eval_logits/chosen": -2.332894802093506, + "eval_logits/rejected": -2.3454771041870117, + "eval_logps/chosen": -468.6414489746094, + "eval_logps/rejected": -476.00213623046875, + "eval_loss": 0.6092647910118103, + "eval_rewards/accuracies": 0.6654999852180481, + "eval_rewards/chosen": -1.3589202165603638, + "eval_rewards/margins": 0.39000067114830017, + "eval_rewards/rejected": -1.7489211559295654, + "eval_runtime": 197.027, + "eval_samples_per_second": 10.151, + "eval_steps_per_second": 5.075, + "step": 4280 + }, + { + "epoch": 0.56, + "learning_rate": 2.40065205384738e-06, + "logits/chosen": -2.482933282852173, + "logits/rejected": -2.405017852783203, + "logps/chosen": -444.735107421875, + "logps/rejected": -423.2681579589844, + "loss": 0.7136, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -1.447666049003601, + "rewards/margins": 0.18110871315002441, + "rewards/rejected": -1.628774881362915, + "step": 4290 + }, + { + "epoch": 0.56, + "eval_logits/chosen": -2.337977647781372, + "eval_logits/rejected": -2.350689172744751, + "eval_logps/chosen": -465.7172546386719, + "eval_logps/rejected": -472.37890625, + "eval_loss": 0.6085383296012878, + "eval_rewards/accuracies": 0.6650000214576721, + "eval_rewards/chosen": -1.3296782970428467, + "eval_rewards/margins": 0.38301026821136475, + "eval_rewards/rejected": -1.712688684463501, + "eval_runtime": 197.1026, + "eval_samples_per_second": 10.147, + "eval_steps_per_second": 5.073, + "step": 4290 + }, + { + "epoch": 0.56, + "learning_rate": 2.389239841277793e-06, + "logits/chosen": -2.367617130279541, + "logits/rejected": -2.3953096866607666, + "logps/chosen": -449.0538024902344, + "logps/rejected": -443.99176025390625, + "loss": 0.5972, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.3654316663742065, + "rewards/margins": 0.3840712308883667, + "rewards/rejected": -1.7495027780532837, + "step": 4300 + }, + { + "epoch": 0.56, + "eval_logits/chosen": -2.348155975341797, + "eval_logits/rejected": -2.3609445095062256, + "eval_logps/chosen": -463.5664367675781, + "eval_logps/rejected": -469.9287109375, + "eval_loss": 0.6079076528549194, + "eval_rewards/accuracies": 0.6644999980926514, + "eval_rewards/chosen": -1.3081703186035156, + "eval_rewards/margins": 0.3800167143344879, + "eval_rewards/rejected": -1.6881871223449707, + "eval_runtime": 196.9503, + "eval_samples_per_second": 10.155, + "eval_steps_per_second": 5.077, + "step": 4300 + }, + { + "epoch": 0.56, + "learning_rate": 2.3778299408306167e-06, + "logits/chosen": -2.5109307765960693, + "logits/rejected": -2.4798407554626465, + "logps/chosen": -425.0166931152344, + "logps/rejected": -450.65692138671875, + "loss": 0.5835, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.210313081741333, + "rewards/margins": 0.47474008798599243, + "rewards/rejected": -1.6850531101226807, + "step": 4310 + }, + { + "epoch": 0.56, + "eval_logits/chosen": -2.3537330627441406, + "eval_logits/rejected": -2.3664982318878174, + "eval_logps/chosen": -462.9638366699219, + "eval_logps/rejected": -469.1507873535156, + "eval_loss": 0.6074733734130859, + "eval_rewards/accuracies": 0.6639999747276306, + "eval_rewards/chosen": -1.3021445274353027, + "eval_rewards/margins": 0.37826311588287354, + "eval_rewards/rejected": -1.6804077625274658, + "eval_runtime": 197.2587, + "eval_samples_per_second": 10.139, + "eval_steps_per_second": 5.069, + "step": 4310 + }, + { + "epoch": 0.57, + "learning_rate": 2.3664225906879452e-06, + "logits/chosen": -2.504697561264038, + "logits/rejected": -2.5029256343841553, + "logps/chosen": -428.6754455566406, + "logps/rejected": -426.82843017578125, + "loss": 0.6176, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.2406651973724365, + "rewards/margins": 0.34046998620033264, + "rewards/rejected": -1.5811351537704468, + "step": 4320 + }, + { + "epoch": 0.57, + "eval_logits/chosen": -2.3593673706054688, + "eval_logits/rejected": -2.3729419708251953, + "eval_logps/chosen": -456.5874328613281, + "eval_logps/rejected": -462.12481689453125, + "eval_loss": 0.607283353805542, + "eval_rewards/accuracies": 0.6610000133514404, + "eval_rewards/chosen": -1.2383801937103271, + "eval_rewards/margins": 0.3717676103115082, + "eval_rewards/rejected": -1.6101479530334473, + "eval_runtime": 197.0978, + "eval_samples_per_second": 10.147, + "eval_steps_per_second": 5.074, + "step": 4320 + }, + { + "epoch": 0.57, + "learning_rate": 2.3550180289786357e-06, + "logits/chosen": -2.5368552207946777, + "logits/rejected": -2.469285488128662, + "logps/chosen": -431.1910705566406, + "logps/rejected": -420.4564514160156, + "loss": 0.5657, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -1.0879895687103271, + "rewards/margins": 0.4566231369972229, + "rewards/rejected": -1.5446126461029053, + "step": 4330 + }, + { + "epoch": 0.57, + "eval_logits/chosen": -2.364882230758667, + "eval_logits/rejected": -2.378333806991577, + "eval_logps/chosen": -456.3202819824219, + "eval_logps/rejected": -461.741943359375, + "eval_loss": 0.6068199276924133, + "eval_rewards/accuracies": 0.6654999852180481, + "eval_rewards/chosen": -1.235708475112915, + "eval_rewards/margins": 0.3706108033657074, + "eval_rewards/rejected": -1.6063191890716553, + "eval_runtime": 197.2059, + "eval_samples_per_second": 10.142, + "eval_steps_per_second": 5.071, + "step": 4330 + }, + { + "epoch": 0.57, + "learning_rate": 2.343616493773335e-06, + "logits/chosen": -2.6210741996765137, + "logits/rejected": -2.5647199153900146, + "logps/chosen": -448.553466796875, + "logps/rejected": -487.02490234375, + "loss": 0.5632, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.2043299674987793, + "rewards/margins": 0.42456427216529846, + "rewards/rejected": -1.6288942098617554, + "step": 4340 + }, + { + "epoch": 0.57, + "eval_logits/chosen": -2.36385440826416, + "eval_logits/rejected": -2.3777124881744385, + "eval_logps/chosen": -456.9960021972656, + "eval_logps/rejected": -462.7846374511719, + "eval_loss": 0.6074703335762024, + "eval_rewards/accuracies": 0.6629999876022339, + "eval_rewards/chosen": -1.242465853691101, + "eval_rewards/margins": 0.37428027391433716, + "eval_rewards/rejected": -1.6167460680007935, + "eval_runtime": 196.9677, + "eval_samples_per_second": 10.154, + "eval_steps_per_second": 5.077, + "step": 4340 + }, + { + "epoch": 0.57, + "learning_rate": 2.3322182230795127e-06, + "logits/chosen": -2.5477375984191895, + "logits/rejected": -2.5292723178863525, + "logps/chosen": -395.3967590332031, + "logps/rejected": -476.68109130859375, + "loss": 0.5542, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -1.1301119327545166, + "rewards/margins": 0.5130189657211304, + "rewards/rejected": -1.6431306600570679, + "step": 4350 + }, + { + "epoch": 0.57, + "eval_logits/chosen": -2.362542152404785, + "eval_logits/rejected": -2.3769373893737793, + "eval_logps/chosen": -454.8631896972656, + "eval_logps/rejected": -460.7909851074219, + "eval_loss": 0.6081883311271667, + "eval_rewards/accuracies": 0.6610000133514404, + "eval_rewards/chosen": -1.2211376428604126, + "eval_rewards/margins": 0.3756721317768097, + "eval_rewards/rejected": -1.5968098640441895, + "eval_runtime": 196.9785, + "eval_samples_per_second": 10.153, + "eval_steps_per_second": 5.077, + "step": 4350 + }, + { + "epoch": 0.57, + "learning_rate": 2.320823454836491e-06, + "logits/chosen": -2.7069315910339355, + "logits/rejected": -2.598485231399536, + "logps/chosen": -436.9664001464844, + "logps/rejected": -443.3999938964844, + "loss": 0.5563, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -1.1292293071746826, + "rewards/margins": 0.44567570090293884, + "rewards/rejected": -1.5749050378799438, + "step": 4360 + }, + { + "epoch": 0.57, + "eval_logits/chosen": -2.362305164337158, + "eval_logits/rejected": -2.377182722091675, + "eval_logps/chosen": -453.9595031738281, + "eval_logps/rejected": -460.05413818359375, + "eval_loss": 0.6087186336517334, + "eval_rewards/accuracies": 0.6654999852180481, + "eval_rewards/chosen": -1.2121007442474365, + "eval_rewards/margins": 0.3773403763771057, + "eval_rewards/rejected": -1.589441180229187, + "eval_runtime": 197.1092, + "eval_samples_per_second": 10.147, + "eval_steps_per_second": 5.073, + "step": 4360 + }, + { + "epoch": 0.57, + "learning_rate": 2.309432426910478e-06, + "logits/chosen": -2.4575705528259277, + "logits/rejected": -2.4372870922088623, + "logps/chosen": -483.6983337402344, + "logps/rejected": -443.008056640625, + "loss": 0.6174, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.1192137002944946, + "rewards/margins": 0.37381118535995483, + "rewards/rejected": -1.4930248260498047, + "step": 4370 + }, + { + "epoch": 0.57, + "eval_logits/chosen": -2.3570568561553955, + "eval_logits/rejected": -2.371819257736206, + "eval_logps/chosen": -455.49847412109375, + "eval_logps/rejected": -461.7596740722656, + "eval_loss": 0.6090093851089478, + "eval_rewards/accuracies": 0.6654999852180481, + "eval_rewards/chosen": -1.227491021156311, + "eval_rewards/margins": 0.379006028175354, + "eval_rewards/rejected": -1.606496810913086, + "eval_runtime": 197.0527, + "eval_samples_per_second": 10.15, + "eval_steps_per_second": 5.075, + "step": 4370 + }, + { + "epoch": 0.57, + "learning_rate": 2.298045377089604e-06, + "logits/chosen": -2.5362887382507324, + "logits/rejected": -2.52489972114563, + "logps/chosen": -435.7310485839844, + "logps/rejected": -447.492919921875, + "loss": 0.5537, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.2247138023376465, + "rewards/margins": 0.4836392402648926, + "rewards/rejected": -1.708353042602539, + "step": 4380 + }, + { + "epoch": 0.57, + "eval_logits/chosen": -2.35113787651062, + "eval_logits/rejected": -2.365795612335205, + "eval_logps/chosen": -460.2951965332031, + "eval_logps/rejected": -467.2223815917969, + "eval_loss": 0.6088528037071228, + "eval_rewards/accuracies": 0.6629999876022339, + "eval_rewards/chosen": -1.2754576206207275, + "eval_rewards/margins": 0.3856658637523651, + "eval_rewards/rejected": -1.661123514175415, + "eval_runtime": 196.9929, + "eval_samples_per_second": 10.153, + "eval_steps_per_second": 5.076, + "step": 4380 + }, + { + "epoch": 0.57, + "learning_rate": 2.286662543078955e-06, + "logits/chosen": -2.4176924228668213, + "logits/rejected": -2.4342312812805176, + "logps/chosen": -475.22503662109375, + "logps/rejected": -464.350830078125, + "loss": 0.5696, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -1.2765443325042725, + "rewards/margins": 0.389670729637146, + "rewards/rejected": -1.666215181350708, + "step": 4390 + }, + { + "epoch": 0.57, + "eval_logits/chosen": -2.35123348236084, + "eval_logits/rejected": -2.3659682273864746, + "eval_logps/chosen": -462.67279052734375, + "eval_logps/rejected": -469.9460144042969, + "eval_loss": 0.6087071299552917, + "eval_rewards/accuracies": 0.6629999876022339, + "eval_rewards/chosen": -1.29923415184021, + "eval_rewards/margins": 0.38912561535835266, + "eval_rewards/rejected": -1.6883596181869507, + "eval_runtime": 196.9775, + "eval_samples_per_second": 10.153, + "eval_steps_per_second": 5.077, + "step": 4390 + }, + { + "epoch": 0.58, + "learning_rate": 2.2752841624956125e-06, + "logits/chosen": -2.636507034301758, + "logits/rejected": -2.518415689468384, + "logps/chosen": -503.35247802734375, + "logps/rejected": -511.513427734375, + "loss": 0.6052, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -1.3775184154510498, + "rewards/margins": 0.47160688042640686, + "rewards/rejected": -1.8491252660751343, + "step": 4400 + }, + { + "epoch": 0.58, + "eval_logits/chosen": -2.35198974609375, + "eval_logits/rejected": -2.3669545650482178, + "eval_logps/chosen": -461.1800537109375, + "eval_logps/rejected": -468.2998352050781, + "eval_loss": 0.6087808012962341, + "eval_rewards/accuracies": 0.6639999747276306, + "eval_rewards/chosen": -1.2843064069747925, + "eval_rewards/margins": 0.3875918388366699, + "eval_rewards/rejected": -1.671898365020752, + "eval_runtime": 197.0059, + "eval_samples_per_second": 10.152, + "eval_steps_per_second": 5.076, + "step": 4400 + }, + { + "epoch": 0.58, + "learning_rate": 2.2639104728636915e-06, + "logits/chosen": -2.5947508811950684, + "logits/rejected": -2.58724308013916, + "logps/chosen": -426.2372131347656, + "logps/rejected": -467.16937255859375, + "loss": 0.5886, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.1018826961517334, + "rewards/margins": 0.4221973419189453, + "rewards/rejected": -1.5240800380706787, + "step": 4410 + }, + { + "epoch": 0.58, + "eval_logits/chosen": -2.347571849822998, + "eval_logits/rejected": -2.3628687858581543, + "eval_logps/chosen": -457.931396484375, + "eval_logps/rejected": -464.91552734375, + "eval_loss": 0.6095851063728333, + "eval_rewards/accuracies": 0.6679999828338623, + "eval_rewards/chosen": -1.2518198490142822, + "eval_rewards/margins": 0.38623523712158203, + "eval_rewards/rejected": -1.6380552053451538, + "eval_runtime": 197.0362, + "eval_samples_per_second": 10.15, + "eval_steps_per_second": 5.075, + "step": 4410 + }, + { + "epoch": 0.58, + "learning_rate": 2.252541711609384e-06, + "logits/chosen": -2.551729679107666, + "logits/rejected": -2.4922897815704346, + "logps/chosen": -436.5389099121094, + "logps/rejected": -428.7633361816406, + "loss": 0.586, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.1860450506210327, + "rewards/margins": 0.4158903956413269, + "rewards/rejected": -1.601935625076294, + "step": 4420 + }, + { + "epoch": 0.58, + "eval_logits/chosen": -2.346620559692383, + "eval_logits/rejected": -2.3620049953460693, + "eval_logps/chosen": -454.94219970703125, + "eval_logps/rejected": -461.5989074707031, + "eval_loss": 0.609160840511322, + "eval_rewards/accuracies": 0.6704999804496765, + "eval_rewards/chosen": -1.2219277620315552, + "eval_rewards/margins": 0.3829614222049713, + "eval_rewards/rejected": -1.604889154434204, + "eval_runtime": 197.0636, + "eval_samples_per_second": 10.149, + "eval_steps_per_second": 5.075, + "step": 4420 + }, + { + "epoch": 0.58, + "learning_rate": 2.241178116056002e-06, + "logits/chosen": -2.5624594688415527, + "logits/rejected": -2.5428500175476074, + "logps/chosen": -426.37109375, + "logps/rejected": -437.63995361328125, + "loss": 0.5653, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -1.156144618988037, + "rewards/margins": 0.45035356283187866, + "rewards/rejected": -1.60649836063385, + "step": 4430 + }, + { + "epoch": 0.58, + "eval_logits/chosen": -2.349269390106201, + "eval_logits/rejected": -2.364637613296509, + "eval_logps/chosen": -454.94842529296875, + "eval_logps/rejected": -461.6927490234375, + "eval_loss": 0.6091820597648621, + "eval_rewards/accuracies": 0.6685000061988831, + "eval_rewards/chosen": -1.2219903469085693, + "eval_rewards/margins": 0.3838370144367218, + "eval_rewards/rejected": -1.6058274507522583, + "eval_runtime": 196.9566, + "eval_samples_per_second": 10.155, + "eval_steps_per_second": 5.077, + "step": 4430 + }, + { + "epoch": 0.58, + "learning_rate": 2.2298199234190236e-06, + "logits/chosen": -2.4795172214508057, + "logits/rejected": -2.5077686309814453, + "logps/chosen": -477.9178771972656, + "logps/rejected": -481.4457092285156, + "loss": 0.5427, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -1.2306759357452393, + "rewards/margins": 0.5230705738067627, + "rewards/rejected": -1.7537466287612915, + "step": 4440 + }, + { + "epoch": 0.58, + "eval_logits/chosen": -2.3430752754211426, + "eval_logits/rejected": -2.3584113121032715, + "eval_logps/chosen": -461.8674011230469, + "eval_logps/rejected": -469.6636047363281, + "eval_loss": 0.6097197532653809, + "eval_rewards/accuracies": 0.6669999957084656, + "eval_rewards/chosen": -1.2911797761917114, + "eval_rewards/margins": 0.3943558931350708, + "eval_rewards/rejected": -1.6855357885360718, + "eval_runtime": 196.9233, + "eval_samples_per_second": 10.156, + "eval_steps_per_second": 5.078, + "step": 4440 + }, + { + "epoch": 0.58, + "learning_rate": 2.218467370801138e-06, + "logits/chosen": -2.5464415550231934, + "logits/rejected": -2.5220420360565186, + "logps/chosen": -467.94561767578125, + "logps/rejected": -458.48199462890625, + "loss": 0.6427, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.3079677820205688, + "rewards/margins": 0.29971104860305786, + "rewards/rejected": -1.607678771018982, + "step": 4450 + }, + { + "epoch": 0.58, + "eval_logits/chosen": -2.350649833679199, + "eval_logits/rejected": -2.366107225418091, + "eval_logps/chosen": -462.7431945800781, + "eval_logps/rejected": -470.6502990722656, + "eval_loss": 0.6094748973846436, + "eval_rewards/accuracies": 0.6650000214576721, + "eval_rewards/chosen": -1.2999377250671387, + "eval_rewards/margins": 0.39546507596969604, + "eval_rewards/rejected": -1.695402979850769, + "eval_runtime": 196.8685, + "eval_samples_per_second": 10.159, + "eval_steps_per_second": 5.08, + "step": 4450 + }, + { + "epoch": 0.58, + "learning_rate": 2.207120695187304e-06, + "logits/chosen": -2.4268229007720947, + "logits/rejected": -2.4031078815460205, + "logps/chosen": -478.80499267578125, + "logps/rejected": -481.1709899902344, + "loss": 0.5438, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.222773790359497, + "rewards/margins": 0.5444897413253784, + "rewards/rejected": -1.767263650894165, + "step": 4460 + }, + { + "epoch": 0.58, + "eval_logits/chosen": -2.359865188598633, + "eval_logits/rejected": -2.3748998641967773, + "eval_logps/chosen": -465.46929931640625, + "eval_logps/rejected": -473.4424743652344, + "eval_loss": 0.6078117489814758, + "eval_rewards/accuracies": 0.6660000085830688, + "eval_rewards/chosen": -1.3271992206573486, + "eval_rewards/margins": 0.39612552523612976, + "eval_rewards/rejected": -1.7233246564865112, + "eval_runtime": 197.046, + "eval_samples_per_second": 10.15, + "eval_steps_per_second": 5.075, + "step": 4460 + }, + { + "epoch": 0.58, + "learning_rate": 2.195780133439794e-06, + "logits/chosen": -2.5647144317626953, + "logits/rejected": -2.566028118133545, + "logps/chosen": -478.5218200683594, + "logps/rejected": -513.4711303710938, + "loss": 0.6207, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -1.3200973272323608, + "rewards/margins": 0.40641552209854126, + "rewards/rejected": -1.7265126705169678, + "step": 4470 + }, + { + "epoch": 0.58, + "eval_logits/chosen": -2.3541791439056396, + "eval_logits/rejected": -2.3686718940734863, + "eval_logps/chosen": -473.7710266113281, + "eval_logps/rejected": -482.60931396484375, + "eval_loss": 0.608197033405304, + "eval_rewards/accuracies": 0.6654999852180481, + "eval_rewards/chosen": -1.410216212272644, + "eval_rewards/margins": 0.40477627515792847, + "eval_rewards/rejected": -1.8149923086166382, + "eval_runtime": 196.9358, + "eval_samples_per_second": 10.156, + "eval_steps_per_second": 5.078, + "step": 4470 + }, + { + "epoch": 0.59, + "learning_rate": 2.1844459222932535e-06, + "logits/chosen": -2.5640816688537598, + "logits/rejected": -2.5077226161956787, + "logps/chosen": -475.5809631347656, + "logps/rejected": -474.53924560546875, + "loss": 0.5768, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -1.2696446180343628, + "rewards/margins": 0.47660988569259644, + "rewards/rejected": -1.7462546825408936, + "step": 4480 + }, + { + "epoch": 0.59, + "eval_logits/chosen": -2.3506596088409424, + "eval_logits/rejected": -2.364856004714966, + "eval_logps/chosen": -477.4462890625, + "eval_logps/rejected": -486.5351867675781, + "eval_loss": 0.6080268621444702, + "eval_rewards/accuracies": 0.6654999852180481, + "eval_rewards/chosen": -1.4469685554504395, + "eval_rewards/margins": 0.40728288888931274, + "eval_rewards/rejected": -1.8542513847351074, + "eval_runtime": 197.2231, + "eval_samples_per_second": 10.141, + "eval_steps_per_second": 5.07, + "step": 4480 + }, + { + "epoch": 0.59, + "learning_rate": 2.17311829834976e-06, + "logits/chosen": -2.5868237018585205, + "logits/rejected": -2.5791220664978027, + "logps/chosen": -462.0890197753906, + "logps/rejected": -485.8500061035156, + "loss": 0.583, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.2890268564224243, + "rewards/margins": 0.41418081521987915, + "rewards/rejected": -1.7032077312469482, + "step": 4490 + }, + { + "epoch": 0.59, + "eval_logits/chosen": -2.3469271659851074, + "eval_logits/rejected": -2.36118221282959, + "eval_logps/chosen": -479.05010986328125, + "eval_logps/rejected": -488.24432373046875, + "eval_loss": 0.6082322597503662, + "eval_rewards/accuracies": 0.6679999828338623, + "eval_rewards/chosen": -1.463006615638733, + "eval_rewards/margins": 0.40833622217178345, + "eval_rewards/rejected": -1.8713427782058716, + "eval_runtime": 197.0382, + "eval_samples_per_second": 10.15, + "eval_steps_per_second": 5.075, + "step": 4490 + }, + { + "epoch": 0.59, + "learning_rate": 2.1617974980738814e-06, + "logits/chosen": -2.572697162628174, + "logits/rejected": -2.563896656036377, + "logps/chosen": -455.87713623046875, + "logps/rejected": -458.12823486328125, + "loss": 0.531, + "rewards/accuracies": 0.8374999761581421, + "rewards/chosen": -1.3974636793136597, + "rewards/margins": 0.5185772180557251, + "rewards/rejected": -1.9160410165786743, + "step": 4500 + }, + { + "epoch": 0.59, + "eval_logits/chosen": -2.3504679203033447, + "eval_logits/rejected": -2.3647711277008057, + "eval_logps/chosen": -475.88604736328125, + "eval_logps/rejected": -484.9482116699219, + "eval_loss": 0.6084606647491455, + "eval_rewards/accuracies": 0.6639999747276306, + "eval_rewards/chosen": -1.4313663244247437, + "eval_rewards/margins": 0.4070153832435608, + "eval_rewards/rejected": -1.8383818864822388, + "eval_runtime": 197.0121, + "eval_samples_per_second": 10.152, + "eval_steps_per_second": 5.076, + "step": 4500 + }, + { + "epoch": 0.59, + "learning_rate": 2.150483757787744e-06, + "logits/chosen": -2.575751781463623, + "logits/rejected": -2.5314788818359375, + "logps/chosen": -459.76483154296875, + "logps/rejected": -441.937255859375, + "loss": 0.5774, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.4474533796310425, + "rewards/margins": 0.47381964325904846, + "rewards/rejected": -1.9212729930877686, + "step": 4510 + }, + { + "epoch": 0.59, + "eval_logits/chosen": -2.350123643875122, + "eval_logits/rejected": -2.364333391189575, + "eval_logps/chosen": -473.5544738769531, + "eval_logps/rejected": -482.2587890625, + "eval_loss": 0.6079715490341187, + "eval_rewards/accuracies": 0.6654999852180481, + "eval_rewards/chosen": -1.4080506563186646, + "eval_rewards/margins": 0.40343719720840454, + "eval_rewards/rejected": -1.8114880323410034, + "eval_runtime": 197.1065, + "eval_samples_per_second": 10.147, + "eval_steps_per_second": 5.073, + "step": 4510 + }, + { + "epoch": 0.59, + "learning_rate": 2.139177313666093e-06, + "logits/chosen": -2.509402275085449, + "logits/rejected": -2.524897336959839, + "logps/chosen": -487.89910888671875, + "logps/rejected": -468.90643310546875, + "loss": 0.5757, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.24689519405365, + "rewards/margins": 0.44425448775291443, + "rewards/rejected": -1.6911497116088867, + "step": 4520 + }, + { + "epoch": 0.59, + "eval_logits/chosen": -2.347217321395874, + "eval_logits/rejected": -2.3612282276153564, + "eval_logps/chosen": -472.8621826171875, + "eval_logps/rejected": -481.3768615722656, + "eval_loss": 0.6074300408363342, + "eval_rewards/accuracies": 0.6644999980926514, + "eval_rewards/chosen": -1.4011281728744507, + "eval_rewards/margins": 0.40154018998146057, + "eval_rewards/rejected": -1.8026682138442993, + "eval_runtime": 197.0079, + "eval_samples_per_second": 10.152, + "eval_steps_per_second": 5.076, + "step": 4520 + }, + { + "epoch": 0.59, + "learning_rate": 2.1278784017313688e-06, + "logits/chosen": -2.5669217109680176, + "logits/rejected": -2.5706307888031006, + "logps/chosen": -495.63836669921875, + "logps/rejected": -530.7364501953125, + "loss": 0.6138, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.3805519342422485, + "rewards/margins": 0.36727243661880493, + "rewards/rejected": -1.7478240728378296, + "step": 4530 + }, + { + "epoch": 0.59, + "eval_logits/chosen": -2.3434321880340576, + "eval_logits/rejected": -2.3577535152435303, + "eval_logps/chosen": -471.2819519042969, + "eval_logps/rejected": -479.9246826171875, + "eval_loss": 0.6079375147819519, + "eval_rewards/accuracies": 0.6639999747276306, + "eval_rewards/chosen": -1.3853251934051514, + "eval_rewards/margins": 0.40282142162323, + "eval_rewards/rejected": -1.7881464958190918, + "eval_runtime": 197.0815, + "eval_samples_per_second": 10.148, + "eval_steps_per_second": 5.074, + "step": 4530 + }, + { + "epoch": 0.59, + "learning_rate": 2.116587257848776e-06, + "logits/chosen": -2.5853219032287598, + "logits/rejected": -2.5926265716552734, + "logps/chosen": -447.34991455078125, + "logps/rejected": -500.7210998535156, + "loss": 0.6412, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.4180644750595093, + "rewards/margins": 0.32260221242904663, + "rewards/rejected": -1.7406667470932007, + "step": 4540 + }, + { + "epoch": 0.59, + "eval_logits/chosen": -2.3363149166107178, + "eval_logits/rejected": -2.3509626388549805, + "eval_logps/chosen": -471.30853271484375, + "eval_logps/rejected": -480.26007080078125, + "eval_loss": 0.6089949011802673, + "eval_rewards/accuracies": 0.6685000061988831, + "eval_rewards/chosen": -1.3855911493301392, + "eval_rewards/margins": 0.40590932965278625, + "eval_rewards/rejected": -1.7915005683898926, + "eval_runtime": 197.0543, + "eval_samples_per_second": 10.149, + "eval_steps_per_second": 5.075, + "step": 4540 + }, + { + "epoch": 0.6, + "learning_rate": 2.105304117721361e-06, + "logits/chosen": -2.397624969482422, + "logits/rejected": -2.4318509101867676, + "logps/chosen": -404.4676208496094, + "logps/rejected": -399.29339599609375, + "loss": 0.6387, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -1.3216915130615234, + "rewards/margins": 0.3007916212081909, + "rewards/rejected": -1.6224830150604248, + "step": 4550 + }, + { + "epoch": 0.6, + "eval_logits/chosen": -2.3398046493530273, + "eval_logits/rejected": -2.354630470275879, + "eval_logps/chosen": -469.4595031738281, + "eval_logps/rejected": -478.1709899902344, + "eval_loss": 0.6085323095321655, + "eval_rewards/accuracies": 0.6700000166893005, + "eval_rewards/chosen": -1.3671008348464966, + "eval_rewards/margins": 0.4035090506076813, + "eval_rewards/rejected": -1.770609736442566, + "eval_runtime": 197.043, + "eval_samples_per_second": 10.15, + "eval_steps_per_second": 5.075, + "step": 4550 + }, + { + "epoch": 0.6, + "learning_rate": 2.0940292168850913e-06, + "logits/chosen": -2.455711841583252, + "logits/rejected": -2.4487950801849365, + "logps/chosen": -457.455810546875, + "logps/rejected": -445.8837890625, + "loss": 0.6527, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.3930675983428955, + "rewards/margins": 0.2900000810623169, + "rewards/rejected": -1.6830676794052124, + "step": 4560 + }, + { + "epoch": 0.6, + "eval_logits/chosen": -2.3467464447021484, + "eval_logits/rejected": -2.3617465496063232, + "eval_logps/chosen": -465.6805419921875, + "eval_logps/rejected": -473.7642517089844, + "eval_loss": 0.6076022386550903, + "eval_rewards/accuracies": 0.6694999933242798, + "eval_rewards/chosen": -1.3293112516403198, + "eval_rewards/margins": 0.39723050594329834, + "eval_rewards/rejected": -1.7265417575836182, + "eval_runtime": 196.898, + "eval_samples_per_second": 10.158, + "eval_steps_per_second": 5.079, + "step": 4560 + }, + { + "epoch": 0.6, + "learning_rate": 2.082762790703939e-06, + "logits/chosen": -2.5249645709991455, + "logits/rejected": -2.4668526649475098, + "logps/chosen": -469.11578369140625, + "logps/rejected": -475.1290588378906, + "loss": 0.6187, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.3228938579559326, + "rewards/margins": 0.34750640392303467, + "rewards/rejected": -1.6704002618789673, + "step": 4570 + }, + { + "epoch": 0.6, + "eval_logits/chosen": -2.355630874633789, + "eval_logits/rejected": -2.370851755142212, + "eval_logps/chosen": -459.7823181152344, + "eval_logps/rejected": -466.9198303222656, + "eval_loss": 0.6069644093513489, + "eval_rewards/accuracies": 0.6654999852180481, + "eval_rewards/chosen": -1.2703286409378052, + "eval_rewards/margins": 0.38776928186416626, + "eval_rewards/rejected": -1.6580978631973267, + "eval_runtime": 197.2739, + "eval_samples_per_second": 10.138, + "eval_steps_per_second": 5.069, + "step": 4570 + }, + { + "epoch": 0.6, + "learning_rate": 2.0715050743649674e-06, + "logits/chosen": -2.588480234146118, + "logits/rejected": -2.560148239135742, + "logps/chosen": -409.1583251953125, + "logps/rejected": -486.67620849609375, + "loss": 0.5671, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.1350984573364258, + "rewards/margins": 0.4589425027370453, + "rewards/rejected": -1.5940409898757935, + "step": 4580 + }, + { + "epoch": 0.6, + "eval_logits/chosen": -2.3605380058288574, + "eval_logits/rejected": -2.375964403152466, + "eval_logps/chosen": -456.28619384765625, + "eval_logps/rejected": -463.0014953613281, + "eval_loss": 0.6068898439407349, + "eval_rewards/accuracies": 0.6664999723434448, + "eval_rewards/chosen": -1.2353678941726685, + "eval_rewards/margins": 0.38354694843292236, + "eval_rewards/rejected": -1.6189148426055908, + "eval_runtime": 197.0899, + "eval_samples_per_second": 10.148, + "eval_steps_per_second": 5.074, + "step": 4580 + }, + { + "epoch": 0.6, + "learning_rate": 2.060256302873421e-06, + "logits/chosen": -2.578284502029419, + "logits/rejected": -2.5939929485321045, + "logps/chosen": -418.2554626464844, + "logps/rejected": -480.61383056640625, + "loss": 0.5615, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.1238166093826294, + "rewards/margins": 0.49349433183670044, + "rewards/rejected": -1.617310881614685, + "step": 4590 + }, + { + "epoch": 0.6, + "eval_logits/chosen": -2.36183762550354, + "eval_logits/rejected": -2.377291440963745, + "eval_logps/chosen": -455.7953186035156, + "eval_logps/rejected": -462.3291931152344, + "eval_loss": 0.6073416471481323, + "eval_rewards/accuracies": 0.6639999747276306, + "eval_rewards/chosen": -1.2304589748382568, + "eval_rewards/margins": 0.38173264265060425, + "eval_rewards/rejected": -1.6121916770935059, + "eval_runtime": 197.0365, + "eval_samples_per_second": 10.15, + "eval_steps_per_second": 5.075, + "step": 4590 + }, + { + "epoch": 0.6, + "learning_rate": 2.049016711047822e-06, + "logits/chosen": -2.6140739917755127, + "logits/rejected": -2.5730433464050293, + "logps/chosen": -448.59765625, + "logps/rejected": -468.0848693847656, + "loss": 0.5716, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -1.2782400846481323, + "rewards/margins": 0.44344210624694824, + "rewards/rejected": -1.7216823101043701, + "step": 4600 + }, + { + "epoch": 0.6, + "eval_logits/chosen": -2.358152151107788, + "eval_logits/rejected": -2.373021125793457, + "eval_logps/chosen": -460.1275634765625, + "eval_logps/rejected": -467.31146240234375, + "eval_loss": 0.6074530482292175, + "eval_rewards/accuracies": 0.6644999980926514, + "eval_rewards/chosen": -1.2737818956375122, + "eval_rewards/margins": 0.3882325291633606, + "eval_rewards/rejected": -1.6620142459869385, + "eval_runtime": 197.1299, + "eval_samples_per_second": 10.146, + "eval_steps_per_second": 5.073, + "step": 4600 + }, + { + "epoch": 0.6, + "learning_rate": 2.037786533515064e-06, + "logits/chosen": -2.63139009475708, + "logits/rejected": -2.6090714931488037, + "logps/chosen": -522.1685791015625, + "logps/rejected": -497.5794982910156, + "loss": 0.6994, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -1.4936577081680298, + "rewards/margins": 0.17892040312290192, + "rewards/rejected": -1.67257821559906, + "step": 4610 + }, + { + "epoch": 0.6, + "eval_logits/chosen": -2.3628578186035156, + "eval_logits/rejected": -2.3777432441711426, + "eval_logps/chosen": -457.7207946777344, + "eval_logps/rejected": -464.57574462890625, + "eval_loss": 0.6071527004241943, + "eval_rewards/accuracies": 0.6664999723434448, + "eval_rewards/chosen": -1.2497135400772095, + "eval_rewards/margins": 0.3849438726902008, + "eval_rewards/rejected": -1.6346575021743774, + "eval_runtime": 197.0828, + "eval_samples_per_second": 10.148, + "eval_steps_per_second": 5.074, + "step": 4610 + }, + { + "epoch": 0.6, + "learning_rate": 2.02656600470552e-06, + "logits/chosen": -2.5862843990325928, + "logits/rejected": -2.595778703689575, + "logps/chosen": -451.0542907714844, + "logps/rejected": -471.8365173339844, + "loss": 0.5692, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.250226616859436, + "rewards/margins": 0.48750025033950806, + "rewards/rejected": -1.7377268075942993, + "step": 4620 + }, + { + "epoch": 0.6, + "eval_logits/chosen": -2.3665430545806885, + "eval_logits/rejected": -2.381023406982422, + "eval_logps/chosen": -457.5531921386719, + "eval_logps/rejected": -464.34783935546875, + "eval_loss": 0.6064249277114868, + "eval_rewards/accuracies": 0.6675000190734863, + "eval_rewards/chosen": -1.2480376958847046, + "eval_rewards/margins": 0.3843400478363037, + "eval_rewards/rejected": -1.6323778629302979, + "eval_runtime": 196.9761, + "eval_samples_per_second": 10.154, + "eval_steps_per_second": 5.077, + "step": 4620 + }, + { + "epoch": 0.61, + "learning_rate": 2.015355358848144e-06, + "logits/chosen": -2.4676127433776855, + "logits/rejected": -2.5058672428131104, + "logps/chosen": -402.6142883300781, + "logps/rejected": -451.1036682128906, + "loss": 0.6417, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -1.2782353162765503, + "rewards/margins": 0.3019106388092041, + "rewards/rejected": -1.580146074295044, + "step": 4630 + }, + { + "epoch": 0.61, + "eval_logits/chosen": -2.363272190093994, + "eval_logits/rejected": -2.3775339126586914, + "eval_logps/chosen": -459.9270935058594, + "eval_logps/rejected": -467.0786437988281, + "eval_loss": 0.6064499616622925, + "eval_rewards/accuracies": 0.6654999852180481, + "eval_rewards/chosen": -1.2717769145965576, + "eval_rewards/margins": 0.3879096508026123, + "eval_rewards/rejected": -1.6596864461898804, + "eval_runtime": 197.3901, + "eval_samples_per_second": 10.132, + "eval_steps_per_second": 5.066, + "step": 4630 + }, + { + "epoch": 0.61, + "learning_rate": 2.004154829965582e-06, + "logits/chosen": -2.5863049030303955, + "logits/rejected": -2.5930287837982178, + "logps/chosen": -465.68524169921875, + "logps/rejected": -476.76873779296875, + "loss": 0.5776, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.19584321975708, + "rewards/margins": 0.40354451537132263, + "rewards/rejected": -1.599387764930725, + "step": 4640 + }, + { + "epoch": 0.61, + "eval_logits/chosen": -2.360830783843994, + "eval_logits/rejected": -2.3751513957977295, + "eval_logps/chosen": -460.028076171875, + "eval_logps/rejected": -467.1726989746094, + "eval_loss": 0.6065632104873657, + "eval_rewards/accuracies": 0.6679999828338623, + "eval_rewards/chosen": -1.272786259651184, + "eval_rewards/margins": 0.3878403902053833, + "eval_rewards/rejected": -1.6606266498565674, + "eval_runtime": 197.0543, + "eval_samples_per_second": 10.149, + "eval_steps_per_second": 5.075, + "step": 4640 + }, + { + "epoch": 0.61, + "learning_rate": 1.99296465186929e-06, + "logits/chosen": -2.593928098678589, + "logits/rejected": -2.556190013885498, + "logps/chosen": -455.4571228027344, + "logps/rejected": -415.518798828125, + "loss": 0.5816, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -1.0729163885116577, + "rewards/margins": 0.3699313700199127, + "rewards/rejected": -1.442847728729248, + "step": 4650 + }, + { + "epoch": 0.61, + "eval_logits/chosen": -2.365307092666626, + "eval_logits/rejected": -2.380260944366455, + "eval_logps/chosen": -456.14019775390625, + "eval_logps/rejected": -462.9325256347656, + "eval_loss": 0.6066238880157471, + "eval_rewards/accuracies": 0.671500027179718, + "eval_rewards/chosen": -1.233907699584961, + "eval_rewards/margins": 0.3843171000480652, + "eval_rewards/rejected": -1.618224859237671, + "eval_runtime": 197.2054, + "eval_samples_per_second": 10.142, + "eval_steps_per_second": 5.071, + "step": 4650 + }, + { + "epoch": 0.61, + "learning_rate": 1.9817850581546488e-06, + "logits/chosen": -2.5619285106658936, + "logits/rejected": -2.5544750690460205, + "logps/chosen": -470.73931884765625, + "logps/rejected": -511.1991271972656, + "loss": 0.6182, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -1.1987391710281372, + "rewards/margins": 0.35175901651382446, + "rewards/rejected": -1.5504982471466064, + "step": 4660 + }, + { + "epoch": 0.61, + "eval_logits/chosen": -2.366844654083252, + "eval_logits/rejected": -2.381772518157959, + "eval_logps/chosen": -456.302490234375, + "eval_logps/rejected": -463.168701171875, + "eval_loss": 0.6066789031028748, + "eval_rewards/accuracies": 0.6725000143051147, + "eval_rewards/chosen": -1.235530972480774, + "eval_rewards/margins": 0.3850558400154114, + "eval_rewards/rejected": -1.6205867528915405, + "eval_runtime": 197.2261, + "eval_samples_per_second": 10.141, + "eval_steps_per_second": 5.07, + "step": 4660 + }, + { + "epoch": 0.61, + "learning_rate": 1.970616282196091e-06, + "logits/chosen": -2.5769898891448975, + "logits/rejected": -2.601787567138672, + "logps/chosen": -437.11962890625, + "logps/rejected": -451.64862060546875, + "loss": 0.6184, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -1.2210407257080078, + "rewards/margins": 0.3390752375125885, + "rewards/rejected": -1.5601160526275635, + "step": 4670 + }, + { + "epoch": 0.61, + "eval_logits/chosen": -2.368856906890869, + "eval_logits/rejected": -2.383789539337158, + "eval_logps/chosen": -454.7909851074219, + "eval_logps/rejected": -461.31109619140625, + "eval_loss": 0.6065412759780884, + "eval_rewards/accuracies": 0.6729999780654907, + "eval_rewards/chosen": -1.2204158306121826, + "eval_rewards/margins": 0.3815949261188507, + "eval_rewards/rejected": -1.602010726928711, + "eval_runtime": 197.1533, + "eval_samples_per_second": 10.144, + "eval_steps_per_second": 5.072, + "step": 4670 + }, + { + "epoch": 0.61, + "learning_rate": 1.959458557142228e-06, + "logits/chosen": -2.617663860321045, + "logits/rejected": -2.5870256423950195, + "logps/chosen": -432.1153869628906, + "logps/rejected": -464.52020263671875, + "loss": 0.7167, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -1.2370940446853638, + "rewards/margins": 0.1650908887386322, + "rewards/rejected": -1.4021847248077393, + "step": 4680 + }, + { + "epoch": 0.61, + "eval_logits/chosen": -2.3691041469573975, + "eval_logits/rejected": -2.3844714164733887, + "eval_logps/chosen": -451.03857421875, + "eval_logps/rejected": -456.8002624511719, + "eval_loss": 0.6063486337661743, + "eval_rewards/accuracies": 0.671999990940094, + "eval_rewards/chosen": -1.1828911304473877, + "eval_rewards/margins": 0.37401124835014343, + "eval_rewards/rejected": -1.556902289390564, + "eval_runtime": 196.918, + "eval_samples_per_second": 10.157, + "eval_steps_per_second": 5.078, + "step": 4680 + }, + { + "epoch": 0.61, + "learning_rate": 1.948312115910982e-06, + "logits/chosen": -2.5269622802734375, + "logits/rejected": -2.5282649993896484, + "logps/chosen": -453.5530700683594, + "logps/rejected": -455.9603576660156, + "loss": 0.6275, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -1.1158868074417114, + "rewards/margins": 0.47353777289390564, + "rewards/rejected": -1.58942449092865, + "step": 4690 + }, + { + "epoch": 0.61, + "eval_logits/chosen": -2.37105393409729, + "eval_logits/rejected": -2.386112928390503, + "eval_logps/chosen": -449.9285583496094, + "eval_logps/rejected": -455.6019287109375, + "eval_loss": 0.6053135395050049, + "eval_rewards/accuracies": 0.6765000224113464, + "eval_rewards/chosen": -1.171791672706604, + "eval_rewards/margins": 0.3731272518634796, + "eval_rewards/rejected": -1.5449188947677612, + "eval_runtime": 197.1224, + "eval_samples_per_second": 10.146, + "eval_steps_per_second": 5.073, + "step": 4690 + }, + { + "epoch": 0.62, + "learning_rate": 1.937177191184729e-06, + "logits/chosen": -2.5588791370391846, + "logits/rejected": -2.5623703002929688, + "logps/chosen": -411.646484375, + "logps/rejected": -428.03515625, + "loss": 0.6771, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -1.1427921056747437, + "rewards/margins": 0.18787182867527008, + "rewards/rejected": -1.3306639194488525, + "step": 4700 + }, + { + "epoch": 0.62, + "eval_logits/chosen": -2.3747167587280273, + "eval_logits/rejected": -2.3900814056396484, + "eval_logps/chosen": -445.68310546875, + "eval_logps/rejected": -450.5074462890625, + "eval_loss": 0.6052196621894836, + "eval_rewards/accuracies": 0.671999990940094, + "eval_rewards/chosen": -1.129336953163147, + "eval_rewards/margins": 0.3646370768547058, + "eval_rewards/rejected": -1.493973970413208, + "eval_runtime": 196.884, + "eval_samples_per_second": 10.158, + "eval_steps_per_second": 5.079, + "step": 4700 + }, + { + "epoch": 0.62, + "learning_rate": 1.9260540154054317e-06, + "logits/chosen": -2.599818229675293, + "logits/rejected": -2.5831518173217773, + "logps/chosen": -407.63092041015625, + "logps/rejected": -445.3501892089844, + "loss": 0.5374, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -1.0230557918548584, + "rewards/margins": 0.5404427647590637, + "rewards/rejected": -1.5634984970092773, + "step": 4710 + }, + { + "epoch": 0.62, + "eval_logits/chosen": -2.3762285709381104, + "eval_logits/rejected": -2.3915481567382812, + "eval_logps/chosen": -446.7705993652344, + "eval_logps/rejected": -451.7310791015625, + "eval_loss": 0.6049104928970337, + "eval_rewards/accuracies": 0.6729999780654907, + "eval_rewards/chosen": -1.140211820602417, + "eval_rewards/margins": 0.365998774766922, + "eval_rewards/rejected": -1.5062106847763062, + "eval_runtime": 196.9674, + "eval_samples_per_second": 10.154, + "eval_steps_per_second": 5.077, + "step": 4710 + }, + { + "epoch": 0.62, + "learning_rate": 1.9149428207697983e-06, + "logits/chosen": -2.614574670791626, + "logits/rejected": -2.602724552154541, + "logps/chosen": -444.8438415527344, + "logps/rejected": -457.318603515625, + "loss": 0.6618, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -1.1555159091949463, + "rewards/margins": 0.2247290313243866, + "rewards/rejected": -1.3802449703216553, + "step": 4720 + }, + { + "epoch": 0.62, + "eval_logits/chosen": -2.376986026763916, + "eval_logits/rejected": -2.3926074504852295, + "eval_logps/chosen": -446.311767578125, + "eval_logps/rejected": -451.22210693359375, + "eval_loss": 0.6049630045890808, + "eval_rewards/accuracies": 0.6735000014305115, + "eval_rewards/chosen": -1.135623812675476, + "eval_rewards/margins": 0.3654967248439789, + "eval_rewards/rejected": -1.5011205673217773, + "eval_runtime": 197.1008, + "eval_samples_per_second": 10.147, + "eval_steps_per_second": 5.074, + "step": 4720 + }, + { + "epoch": 0.62, + "learning_rate": 1.9038438392244262e-06, + "logits/chosen": -2.5899956226348877, + "logits/rejected": -2.623196840286255, + "logps/chosen": -448.413330078125, + "logps/rejected": -460.62701416015625, + "loss": 0.5748, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -1.0493090152740479, + "rewards/margins": 0.40030479431152344, + "rewards/rejected": -1.4496138095855713, + "step": 4730 + }, + { + "epoch": 0.62, + "eval_logits/chosen": -2.367912769317627, + "eval_logits/rejected": -2.3833110332489014, + "eval_logps/chosen": -450.4692687988281, + "eval_logps/rejected": -455.7525634765625, + "eval_loss": 0.6047419309616089, + "eval_rewards/accuracies": 0.6729999780654907, + "eval_rewards/chosen": -1.177198886871338, + "eval_rewards/margins": 0.36922687292099, + "eval_rewards/rejected": -1.5464258193969727, + "eval_runtime": 196.8485, + "eval_samples_per_second": 10.16, + "eval_steps_per_second": 5.08, + "step": 4730 + }, + { + "epoch": 0.62, + "learning_rate": 1.8927573024609666e-06, + "logits/chosen": -2.5434505939483643, + "logits/rejected": -2.5118329524993896, + "logps/chosen": -391.2030334472656, + "logps/rejected": -415.4365234375, + "loss": 0.5787, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -1.1048915386199951, + "rewards/margins": 0.4196711480617523, + "rewards/rejected": -1.5245627164840698, + "step": 4740 + }, + { + "epoch": 0.62, + "eval_logits/chosen": -2.356715202331543, + "eval_logits/rejected": -2.3718693256378174, + "eval_logps/chosen": -455.8304138183594, + "eval_logps/rejected": -461.651611328125, + "eval_loss": 0.6052024960517883, + "eval_rewards/accuracies": 0.6740000247955322, + "eval_rewards/chosen": -1.2308100461959839, + "eval_rewards/margins": 0.37460586428642273, + "eval_rewards/rejected": -1.605415940284729, + "eval_runtime": 196.9467, + "eval_samples_per_second": 10.155, + "eval_steps_per_second": 5.078, + "step": 4740 + }, + { + "epoch": 0.62, + "learning_rate": 1.8816834419112845e-06, + "logits/chosen": -2.5052685737609863, + "logits/rejected": -2.5242958068847656, + "logps/chosen": -430.42303466796875, + "logps/rejected": -435.1065368652344, + "loss": 0.5646, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.158060908317566, + "rewards/margins": 0.5619903802871704, + "rewards/rejected": -1.7200514078140259, + "step": 4750 + }, + { + "epoch": 0.62, + "eval_logits/chosen": -2.349745512008667, + "eval_logits/rejected": -2.364739179611206, + "eval_logps/chosen": -459.4644470214844, + "eval_logps/rejected": -465.70050048828125, + "eval_loss": 0.6054902076721191, + "eval_rewards/accuracies": 0.671500027179718, + "eval_rewards/chosen": -1.2671502828598022, + "eval_rewards/margins": 0.37875503301620483, + "eval_rewards/rejected": -1.6459051370620728, + "eval_runtime": 197.0325, + "eval_samples_per_second": 10.151, + "eval_steps_per_second": 5.075, + "step": 4750 + }, + { + "epoch": 0.62, + "learning_rate": 1.8706224887426283e-06, + "logits/chosen": -2.541607141494751, + "logits/rejected": -2.5702714920043945, + "logps/chosen": -462.774658203125, + "logps/rejected": -490.91314697265625, + "loss": 0.6502, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.2841370105743408, + "rewards/margins": 0.2700539827346802, + "rewards/rejected": -1.554190993309021, + "step": 4760 + }, + { + "epoch": 0.62, + "eval_logits/chosen": -2.3464877605438232, + "eval_logits/rejected": -2.361438512802124, + "eval_logps/chosen": -459.6805114746094, + "eval_logps/rejected": -465.8286437988281, + "eval_loss": 0.60645592212677, + "eval_rewards/accuracies": 0.6685000061988831, + "eval_rewards/chosen": -1.2693109512329102, + "eval_rewards/margins": 0.37787550687789917, + "eval_rewards/rejected": -1.647186279296875, + "eval_runtime": 196.8861, + "eval_samples_per_second": 10.158, + "eval_steps_per_second": 5.079, + "step": 4760 + }, + { + "epoch": 0.62, + "learning_rate": 1.8595746738528045e-06, + "logits/chosen": -2.5531961917877197, + "logits/rejected": -2.559727191925049, + "logps/chosen": -429.28912353515625, + "logps/rejected": -492.12554931640625, + "loss": 0.5963, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -1.1560680866241455, + "rewards/margins": 0.4180780351161957, + "rewards/rejected": -1.5741461515426636, + "step": 4770 + }, + { + "epoch": 0.62, + "eval_logits/chosen": -2.3467257022857666, + "eval_logits/rejected": -2.3617849349975586, + "eval_logps/chosen": -459.44964599609375, + "eval_logps/rejected": -465.6266174316406, + "eval_loss": 0.6069409847259521, + "eval_rewards/accuracies": 0.6704999804496765, + "eval_rewards/chosen": -1.2670023441314697, + "eval_rewards/margins": 0.3781636953353882, + "eval_rewards/rejected": -1.6451661586761475, + "eval_runtime": 196.8162, + "eval_samples_per_second": 10.162, + "eval_steps_per_second": 5.081, + "step": 4770 + }, + { + "epoch": 0.63, + "learning_rate": 1.8485402278653584e-06, + "logits/chosen": -2.547219753265381, + "logits/rejected": -2.548625946044922, + "logps/chosen": -431.35052490234375, + "logps/rejected": -445.52410888671875, + "loss": 0.5687, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.306722640991211, + "rewards/margins": 0.44679850339889526, + "rewards/rejected": -1.7535209655761719, + "step": 4780 + }, + { + "epoch": 0.63, + "eval_logits/chosen": -2.3422722816467285, + "eval_logits/rejected": -2.357463836669922, + "eval_logps/chosen": -461.4661560058594, + "eval_logps/rejected": -467.7752990722656, + "eval_loss": 0.608340322971344, + "eval_rewards/accuracies": 0.6660000085830688, + "eval_rewards/chosen": -1.2871674299240112, + "eval_rewards/margins": 0.37948548793792725, + "eval_rewards/rejected": -1.666652798652649, + "eval_runtime": 196.9486, + "eval_samples_per_second": 10.155, + "eval_steps_per_second": 5.077, + "step": 4780 + }, + { + "epoch": 0.63, + "learning_rate": 1.8375193811247577e-06, + "logits/chosen": -2.454245090484619, + "logits/rejected": -2.420996904373169, + "logps/chosen": -437.4507751464844, + "logps/rejected": -432.07818603515625, + "loss": 0.644, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -1.3491953611373901, + "rewards/margins": 0.25019291043281555, + "rewards/rejected": -1.5993882417678833, + "step": 4790 + }, + { + "epoch": 0.63, + "eval_logits/chosen": -2.3409667015075684, + "eval_logits/rejected": -2.3561835289001465, + "eval_logps/chosen": -461.2978820800781, + "eval_logps/rejected": -467.45684814453125, + "eval_loss": 0.608421266078949, + "eval_rewards/accuracies": 0.6669999957084656, + "eval_rewards/chosen": -1.2854849100112915, + "eval_rewards/margins": 0.37798330187797546, + "eval_rewards/rejected": -1.6634680032730103, + "eval_runtime": 197.0826, + "eval_samples_per_second": 10.148, + "eval_steps_per_second": 5.074, + "step": 4790 + }, + { + "epoch": 0.63, + "learning_rate": 1.826512363691586e-06, + "logits/chosen": -2.5934157371520996, + "logits/rejected": -2.5818896293640137, + "logps/chosen": -464.135986328125, + "logps/rejected": -465.83282470703125, + "loss": 0.6323, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -1.2174699306488037, + "rewards/margins": 0.3847096264362335, + "rewards/rejected": -1.6021795272827148, + "step": 4800 + }, + { + "epoch": 0.63, + "eval_logits/chosen": -2.336951732635498, + "eval_logits/rejected": -2.351977586746216, + "eval_logps/chosen": -461.8417053222656, + "eval_logps/rejected": -467.85968017578125, + "eval_loss": 0.608333170413971, + "eval_rewards/accuracies": 0.6654999852180481, + "eval_rewards/chosen": -1.2909232378005981, + "eval_rewards/margins": 0.37657347321510315, + "eval_rewards/rejected": -1.667496681213379, + "eval_runtime": 197.0144, + "eval_samples_per_second": 10.152, + "eval_steps_per_second": 5.076, + "step": 4800 + }, + { + "epoch": 0.63, + "learning_rate": 1.8155194053377391e-06, + "logits/chosen": -2.559887647628784, + "logits/rejected": -2.5054869651794434, + "logps/chosen": -448.12042236328125, + "logps/rejected": -444.4010314941406, + "loss": 0.575, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.2008377313613892, + "rewards/margins": 0.487928569316864, + "rewards/rejected": -1.6887662410736084, + "step": 4810 + }, + { + "epoch": 0.63, + "eval_logits/chosen": -2.336354970932007, + "eval_logits/rejected": -2.3510005474090576, + "eval_logps/chosen": -464.4339599609375, + "eval_logps/rejected": -471.0044860839844, + "eval_loss": 0.607102632522583, + "eval_rewards/accuracies": 0.6675000190734863, + "eval_rewards/chosen": -1.3168458938598633, + "eval_rewards/margins": 0.3820990025997162, + "eval_rewards/rejected": -1.6989449262619019, + "eval_runtime": 197.1694, + "eval_samples_per_second": 10.144, + "eval_steps_per_second": 5.072, + "step": 4810 + }, + { + "epoch": 0.63, + "learning_rate": 1.80454073554163e-06, + "logits/chosen": -2.514131784439087, + "logits/rejected": -2.4912569522857666, + "logps/chosen": -406.12646484375, + "logps/rejected": -405.43072509765625, + "loss": 0.645, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -1.2258880138397217, + "rewards/margins": 0.30174189805984497, + "rewards/rejected": -1.527630090713501, + "step": 4820 + }, + { + "epoch": 0.63, + "eval_logits/chosen": -2.3418140411376953, + "eval_logits/rejected": -2.356260299682617, + "eval_logps/chosen": -465.5768127441406, + "eval_logps/rejected": -472.4665222167969, + "eval_loss": 0.6061503291130066, + "eval_rewards/accuracies": 0.6690000295639038, + "eval_rewards/chosen": -1.3282736539840698, + "eval_rewards/margins": 0.3852910101413727, + "eval_rewards/rejected": -1.7135647535324097, + "eval_runtime": 197.061, + "eval_samples_per_second": 10.149, + "eval_steps_per_second": 5.075, + "step": 4820 + }, + { + "epoch": 0.63, + "learning_rate": 1.7935765834833966e-06, + "logits/chosen": -2.5507476329803467, + "logits/rejected": -2.5161209106445312, + "logps/chosen": -430.421142578125, + "logps/rejected": -504.8538513183594, + "loss": 0.5161, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -1.164041519165039, + "rewards/margins": 0.6415061354637146, + "rewards/rejected": -1.8055477142333984, + "step": 4830 + }, + { + "epoch": 0.63, + "eval_logits/chosen": -2.3437082767486572, + "eval_logits/rejected": -2.357666015625, + "eval_logps/chosen": -467.56988525390625, + "eval_logps/rejected": -474.6637268066406, + "eval_loss": 0.6059348583221436, + "eval_rewards/accuracies": 0.6685000061988831, + "eval_rewards/chosen": -1.3482048511505127, + "eval_rewards/margins": 0.38733214139938354, + "eval_rewards/rejected": -1.735536813735962, + "eval_runtime": 196.9656, + "eval_samples_per_second": 10.154, + "eval_steps_per_second": 5.077, + "step": 4830 + }, + { + "epoch": 0.63, + "learning_rate": 1.7826271780401182e-06, + "logits/chosen": -2.353175640106201, + "logits/rejected": -2.387111186981201, + "logps/chosen": -439.74078369140625, + "logps/rejected": -449.5003967285156, + "loss": 0.5804, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -1.3335378170013428, + "rewards/margins": 0.39224615693092346, + "rewards/rejected": -1.7257843017578125, + "step": 4840 + }, + { + "epoch": 0.63, + "eval_logits/chosen": -2.3427133560180664, + "eval_logits/rejected": -2.3566486835479736, + "eval_logps/chosen": -469.28875732421875, + "eval_logps/rejected": -476.5825500488281, + "eval_loss": 0.6061907410621643, + "eval_rewards/accuracies": 0.6685000061988831, + "eval_rewards/chosen": -1.3653934001922607, + "eval_rewards/margins": 0.3893316686153412, + "eval_rewards/rejected": -1.7547252178192139, + "eval_runtime": 197.0425, + "eval_samples_per_second": 10.15, + "eval_steps_per_second": 5.075, + "step": 4840 + }, + { + "epoch": 0.63, + "learning_rate": 1.7716927477810389e-06, + "logits/chosen": -2.543253183364868, + "logits/rejected": -2.5667479038238525, + "logps/chosen": -467.21527099609375, + "logps/rejected": -513.29248046875, + "loss": 0.571, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.4500186443328857, + "rewards/margins": 0.5679537057876587, + "rewards/rejected": -2.017972230911255, + "step": 4850 + }, + { + "epoch": 0.63, + "eval_logits/chosen": -2.3366286754608154, + "eval_logits/rejected": -2.350470542907715, + "eval_logps/chosen": -472.86968994140625, + "eval_logps/rejected": -480.5473937988281, + "eval_loss": 0.6069199442863464, + "eval_rewards/accuracies": 0.6704999804496765, + "eval_rewards/chosen": -1.401202917098999, + "eval_rewards/margins": 0.39317089319229126, + "eval_rewards/rejected": -1.7943737506866455, + "eval_runtime": 197.3065, + "eval_samples_per_second": 10.137, + "eval_steps_per_second": 5.068, + "step": 4850 + }, + { + "epoch": 0.64, + "learning_rate": 1.7607735209627953e-06, + "logits/chosen": -2.544330596923828, + "logits/rejected": -2.4630868434906006, + "logps/chosen": -475.41070556640625, + "logps/rejected": -475.90728759765625, + "loss": 0.5553, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -1.4359493255615234, + "rewards/margins": 0.5188180208206177, + "rewards/rejected": -1.9547672271728516, + "step": 4860 + }, + { + "epoch": 0.64, + "eval_logits/chosen": -2.330714702606201, + "eval_logits/rejected": -2.344393491744995, + "eval_logps/chosen": -475.8780517578125, + "eval_logps/rejected": -483.9844055175781, + "eval_loss": 0.6073537468910217, + "eval_rewards/accuracies": 0.6704999804496765, + "eval_rewards/chosen": -1.4312864542007446, + "eval_rewards/margins": 0.39745715260505676, + "eval_rewards/rejected": -1.8287436962127686, + "eval_runtime": 197.1786, + "eval_samples_per_second": 10.143, + "eval_steps_per_second": 5.072, + "step": 4860 + }, + { + "epoch": 0.64, + "learning_rate": 1.749869725524651e-06, + "logits/chosen": -2.556461811065674, + "logits/rejected": -2.519881010055542, + "logps/chosen": -482.1178283691406, + "logps/rejected": -488.8779296875, + "loss": 0.5774, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -1.4461584091186523, + "rewards/margins": 0.4897529184818268, + "rewards/rejected": -1.9359114170074463, + "step": 4870 + }, + { + "epoch": 0.64, + "eval_logits/chosen": -2.3231258392333984, + "eval_logits/rejected": -2.3361117839813232, + "eval_logps/chosen": -482.7806396484375, + "eval_logps/rejected": -491.5416259765625, + "eval_loss": 0.6079848408699036, + "eval_rewards/accuracies": 0.6700000166893005, + "eval_rewards/chosen": -1.5003119707107544, + "eval_rewards/margins": 0.4040038287639618, + "eval_rewards/rejected": -1.9043160676956177, + "eval_runtime": 197.076, + "eval_samples_per_second": 10.148, + "eval_steps_per_second": 5.074, + "step": 4870 + }, + { + "epoch": 0.64, + "learning_rate": 1.7389815890837392e-06, + "logits/chosen": -2.466991901397705, + "logits/rejected": -2.4719462394714355, + "logps/chosen": -476.388427734375, + "logps/rejected": -549.646240234375, + "loss": 0.5515, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -1.4125124216079712, + "rewards/margins": 0.5354470014572144, + "rewards/rejected": -1.947959303855896, + "step": 4880 + }, + { + "epoch": 0.64, + "eval_logits/chosen": -2.3077244758605957, + "eval_logits/rejected": -2.3208236694335938, + "eval_logps/chosen": -487.87640380859375, + "eval_logps/rejected": -497.2055358886719, + "eval_loss": 0.6097118258476257, + "eval_rewards/accuracies": 0.6754999756813049, + "eval_rewards/chosen": -1.5512698888778687, + "eval_rewards/margins": 0.4096851646900177, + "eval_rewards/rejected": -1.960955023765564, + "eval_runtime": 197.1263, + "eval_samples_per_second": 10.146, + "eval_steps_per_second": 5.073, + "step": 4880 + }, + { + "epoch": 0.64, + "learning_rate": 1.7281093389303105e-06, + "logits/chosen": -2.5559370517730713, + "logits/rejected": -2.5300230979919434, + "logps/chosen": -454.37158203125, + "logps/rejected": -464.384521484375, + "loss": 0.6337, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -1.4563044309616089, + "rewards/margins": 0.377260684967041, + "rewards/rejected": -1.83356511592865, + "step": 4890 + }, + { + "epoch": 0.64, + "eval_logits/chosen": -2.3070895671844482, + "eval_logits/rejected": -2.320559501647949, + "eval_logps/chosen": -485.8458251953125, + "eval_logps/rejected": -494.9561767578125, + "eval_loss": 0.60999995470047, + "eval_rewards/accuracies": 0.6729999780654907, + "eval_rewards/chosen": -1.5309646129608154, + "eval_rewards/margins": 0.40749725699424744, + "eval_rewards/rejected": -1.9384618997573853, + "eval_runtime": 197.1688, + "eval_samples_per_second": 10.144, + "eval_steps_per_second": 5.072, + "step": 4890 + }, + { + "epoch": 0.64, + "learning_rate": 1.7172532020229899e-06, + "logits/chosen": -2.526170253753662, + "logits/rejected": -2.5139780044555664, + "logps/chosen": -498.65167236328125, + "logps/rejected": -504.8377990722656, + "loss": 0.5866, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -1.522796869277954, + "rewards/margins": 0.4846018850803375, + "rewards/rejected": -2.0073986053466797, + "step": 4900 + }, + { + "epoch": 0.64, + "eval_logits/chosen": -2.3101584911346436, + "eval_logits/rejected": -2.3237569332122803, + "eval_logps/chosen": -482.6979675292969, + "eval_logps/rejected": -491.50799560546875, + "eval_loss": 0.6095430850982666, + "eval_rewards/accuracies": 0.6769999861717224, + "eval_rewards/chosen": -1.499485969543457, + "eval_rewards/margins": 0.4044934809207916, + "eval_rewards/rejected": -1.9039794206619263, + "eval_runtime": 197.2086, + "eval_samples_per_second": 10.142, + "eval_steps_per_second": 5.071, + "step": 4900 + }, + { + "epoch": 0.64, + "learning_rate": 1.7064134049840359e-06, + "logits/chosen": -2.507721185684204, + "logits/rejected": -2.546586275100708, + "logps/chosen": -463.30078125, + "logps/rejected": -505.97833251953125, + "loss": 0.5647, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.4573460817337036, + "rewards/margins": 0.46546226739883423, + "rewards/rejected": -1.922808289527893, + "step": 4910 + }, + { + "epoch": 0.64, + "eval_logits/chosen": -2.3065459728240967, + "eval_logits/rejected": -2.3202407360076904, + "eval_logps/chosen": -483.61065673828125, + "eval_logps/rejected": -492.63818359375, + "eval_loss": 0.6099902391433716, + "eval_rewards/accuracies": 0.675000011920929, + "eval_rewards/chosen": -1.5086122751235962, + "eval_rewards/margins": 0.4066696763038635, + "eval_rewards/rejected": -1.9152820110321045, + "eval_runtime": 196.7095, + "eval_samples_per_second": 10.167, + "eval_steps_per_second": 5.084, + "step": 4910 + }, + { + "epoch": 0.64, + "learning_rate": 1.6955901740946136e-06, + "logits/chosen": -2.532555341720581, + "logits/rejected": -2.5162951946258545, + "logps/chosen": -534.9705200195312, + "logps/rejected": -571.7120361328125, + "loss": 0.5853, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.7092939615249634, + "rewards/margins": 0.5048640370368958, + "rewards/rejected": -2.214157819747925, + "step": 4920 + }, + { + "epoch": 0.64, + "eval_logits/chosen": -2.2999629974365234, + "eval_logits/rejected": -2.3135812282562256, + "eval_logps/chosen": -484.7542724609375, + "eval_logps/rejected": -493.9432373046875, + "eval_loss": 0.6107072830200195, + "eval_rewards/accuracies": 0.6710000038146973, + "eval_rewards/chosen": -1.5200488567352295, + "eval_rewards/margins": 0.40828338265419006, + "eval_rewards/rejected": -1.9283322095870972, + "eval_runtime": 196.8667, + "eval_samples_per_second": 10.159, + "eval_steps_per_second": 5.08, + "step": 4920 + }, + { + "epoch": 0.65, + "learning_rate": 1.684783735290067e-06, + "logits/chosen": -2.452775001525879, + "logits/rejected": -2.436053514480591, + "logps/chosen": -464.0335388183594, + "logps/rejected": -503.10565185546875, + "loss": 0.5357, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -1.4800546169281006, + "rewards/margins": 0.6100779175758362, + "rewards/rejected": -2.090132713317871, + "step": 4930 + }, + { + "epoch": 0.65, + "eval_logits/chosen": -2.291494607925415, + "eval_logits/rejected": -2.304412364959717, + "eval_logps/chosen": -488.9641418457031, + "eval_logps/rejected": -498.66180419921875, + "eval_loss": 0.6109405755996704, + "eval_rewards/accuracies": 0.6710000038146973, + "eval_rewards/chosen": -1.5621472597122192, + "eval_rewards/margins": 0.413370817899704, + "eval_rewards/rejected": -1.975517988204956, + "eval_runtime": 197.0966, + "eval_samples_per_second": 10.147, + "eval_steps_per_second": 5.074, + "step": 4930 + }, + { + "epoch": 0.65, + "learning_rate": 1.6739943141552079e-06, + "logits/chosen": -2.4729270935058594, + "logits/rejected": -2.4224693775177, + "logps/chosen": -514.30078125, + "logps/rejected": -504.29193115234375, + "loss": 0.5771, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.4810277223587036, + "rewards/margins": 0.5367648005485535, + "rewards/rejected": -2.017792224884033, + "step": 4940 + }, + { + "epoch": 0.65, + "eval_logits/chosen": -2.2908596992492676, + "eval_logits/rejected": -2.3033571243286133, + "eval_logps/chosen": -489.99090576171875, + "eval_logps/rejected": -499.9726867675781, + "eval_loss": 0.6103520393371582, + "eval_rewards/accuracies": 0.6704999804496765, + "eval_rewards/chosen": -1.5724151134490967, + "eval_rewards/margins": 0.41621133685112, + "eval_rewards/rejected": -1.9886267185211182, + "eval_runtime": 197.1115, + "eval_samples_per_second": 10.147, + "eval_steps_per_second": 5.073, + "step": 4940 + }, + { + "epoch": 0.65, + "learning_rate": 1.663222135919601e-06, + "logits/chosen": -2.5372846126556396, + "logits/rejected": -2.495419979095459, + "logps/chosen": -520.7520141601562, + "logps/rejected": -525.1770629882812, + "loss": 0.6244, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -1.5612698793411255, + "rewards/margins": 0.35862964391708374, + "rewards/rejected": -1.919899582862854, + "step": 4950 + }, + { + "epoch": 0.65, + "eval_logits/chosen": -2.2972042560577393, + "eval_logits/rejected": -2.3097643852233887, + "eval_logps/chosen": -483.6584777832031, + "eval_logps/rejected": -493.1936340332031, + "eval_loss": 0.6088432669639587, + "eval_rewards/accuracies": 0.6690000295639038, + "eval_rewards/chosen": -1.509090542793274, + "eval_rewards/margins": 0.41174548864364624, + "eval_rewards/rejected": -1.9208359718322754, + "eval_runtime": 196.7904, + "eval_samples_per_second": 10.163, + "eval_steps_per_second": 5.082, + "step": 4950 + }, + { + "epoch": 0.65, + "learning_rate": 1.652467425452865e-06, + "logits/chosen": -2.539245128631592, + "logits/rejected": -2.5281739234924316, + "logps/chosen": -452.2598571777344, + "logps/rejected": -458.35791015625, + "loss": 0.6303, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.4337660074234009, + "rewards/margins": 0.30899950861930847, + "rewards/rejected": -1.7427654266357422, + "step": 4960 + }, + { + "epoch": 0.65, + "eval_logits/chosen": -2.304708957672119, + "eval_logits/rejected": -2.317460775375366, + "eval_logps/chosen": -478.4639892578125, + "eval_logps/rejected": -487.269775390625, + "eval_loss": 0.6076183319091797, + "eval_rewards/accuracies": 0.6675000190734863, + "eval_rewards/chosen": -1.4571460485458374, + "eval_rewards/margins": 0.40445175766944885, + "eval_rewards/rejected": -1.8615976572036743, + "eval_runtime": 197.0151, + "eval_samples_per_second": 10.152, + "eval_steps_per_second": 5.076, + "step": 4960 + }, + { + "epoch": 0.65, + "learning_rate": 1.6417304072599787e-06, + "logits/chosen": -2.5274784564971924, + "logits/rejected": -2.4446208477020264, + "logps/chosen": -478.2151794433594, + "logps/rejected": -510.80938720703125, + "loss": 0.6038, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -1.5238378047943115, + "rewards/margins": 0.408879816532135, + "rewards/rejected": -1.9327175617218018, + "step": 4970 + }, + { + "epoch": 0.65, + "eval_logits/chosen": -2.3127100467681885, + "eval_logits/rejected": -2.3254218101501465, + "eval_logps/chosen": -474.1257629394531, + "eval_logps/rejected": -482.4593200683594, + "eval_loss": 0.6065331101417542, + "eval_rewards/accuracies": 0.6664999723434448, + "eval_rewards/chosen": -1.413763165473938, + "eval_rewards/margins": 0.3997298777103424, + "eval_rewards/rejected": -1.8134931325912476, + "eval_runtime": 196.7927, + "eval_samples_per_second": 10.163, + "eval_steps_per_second": 5.081, + "step": 4970 + }, + { + "epoch": 0.65, + "learning_rate": 1.6310113054765947e-06, + "logits/chosen": -2.5427753925323486, + "logits/rejected": -2.5154194831848145, + "logps/chosen": -491.2950744628906, + "logps/rejected": -490.4586486816406, + "loss": 0.5813, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.4039534330368042, + "rewards/margins": 0.549685001373291, + "rewards/rejected": -1.9536384344100952, + "step": 4980 + }, + { + "epoch": 0.65, + "eval_logits/chosen": -2.317664623260498, + "eval_logits/rejected": -2.3301045894622803, + "eval_logps/chosen": -473.3541259765625, + "eval_logps/rejected": -481.7451477050781, + "eval_loss": 0.6059139370918274, + "eval_rewards/accuracies": 0.6669999957084656, + "eval_rewards/chosen": -1.406046748161316, + "eval_rewards/margins": 0.4003046751022339, + "eval_rewards/rejected": -1.8063515424728394, + "eval_runtime": 196.6922, + "eval_samples_per_second": 10.168, + "eval_steps_per_second": 5.084, + "step": 4980 + }, + { + "epoch": 0.65, + "learning_rate": 1.6203103438643591e-06, + "logits/chosen": -2.5425033569335938, + "logits/rejected": -2.545300245285034, + "logps/chosen": -458.9690856933594, + "logps/rejected": -481.59637451171875, + "loss": 0.6386, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -1.4391909837722778, + "rewards/margins": 0.31656602025032043, + "rewards/rejected": -1.7557569742202759, + "step": 4990 + }, + { + "epoch": 0.65, + "eval_logits/chosen": -2.317337989807129, + "eval_logits/rejected": -2.3296010494232178, + "eval_logps/chosen": -473.6238098144531, + "eval_logps/rejected": -482.0346984863281, + "eval_loss": 0.6056146025657654, + "eval_rewards/accuracies": 0.6690000295639038, + "eval_rewards/chosen": -1.408744215965271, + "eval_rewards/margins": 0.4005022644996643, + "eval_rewards/rejected": -1.809246301651001, + "eval_runtime": 196.9842, + "eval_samples_per_second": 10.153, + "eval_steps_per_second": 5.077, + "step": 4990 + }, + { + "epoch": 0.65, + "learning_rate": 1.6096277458062417e-06, + "logits/chosen": -2.5096194744110107, + "logits/rejected": -2.506507635116577, + "logps/chosen": -388.854736328125, + "logps/rejected": -456.4751892089844, + "loss": 0.5541, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.3161919116973877, + "rewards/margins": 0.5099955797195435, + "rewards/rejected": -1.8261874914169312, + "step": 5000 + }, + { + "epoch": 0.65, + "eval_logits/chosen": -2.3156914710998535, + "eval_logits/rejected": -2.3281033039093018, + "eval_logps/chosen": -473.21319580078125, + "eval_logps/rejected": -481.538330078125, + "eval_loss": 0.606336772441864, + "eval_rewards/accuracies": 0.6679999828338623, + "eval_rewards/chosen": -1.4046378135681152, + "eval_rewards/margins": 0.3996453285217285, + "eval_rewards/rejected": -1.8042830228805542, + "eval_runtime": 196.9579, + "eval_samples_per_second": 10.154, + "eval_steps_per_second": 5.077, + "step": 5000 + }, + { + "epoch": 0.66, + "learning_rate": 1.5989637343018705e-06, + "logits/chosen": -2.4774773120880127, + "logits/rejected": -2.451045274734497, + "logps/chosen": -432.1453552246094, + "logps/rejected": -484.3370056152344, + "loss": 0.5711, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -1.1870195865631104, + "rewards/margins": 0.47284239530563354, + "rewards/rejected": -1.6598621606826782, + "step": 5010 + }, + { + "epoch": 0.66, + "eval_logits/chosen": -2.3094582557678223, + "eval_logits/rejected": -2.3220887184143066, + "eval_logps/chosen": -474.7253723144531, + "eval_logps/rejected": -483.3790588378906, + "eval_loss": 0.6073668003082275, + "eval_rewards/accuracies": 0.6625000238418579, + "eval_rewards/chosen": -1.4197593927383423, + "eval_rewards/margins": 0.40293073654174805, + "eval_rewards/rejected": -1.8226900100708008, + "eval_runtime": 197.0656, + "eval_samples_per_second": 10.149, + "eval_steps_per_second": 5.074, + "step": 5010 + }, + { + "epoch": 0.66, + "learning_rate": 1.5883185319628824e-06, + "logits/chosen": -2.4050259590148926, + "logits/rejected": -2.366429567337036, + "logps/chosen": -499.8345642089844, + "logps/rejected": -475.7578125, + "loss": 0.581, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.4453026056289673, + "rewards/margins": 0.42148298025131226, + "rewards/rejected": -1.8667854070663452, + "step": 5020 + }, + { + "epoch": 0.66, + "eval_logits/chosen": -2.312451124191284, + "eval_logits/rejected": -2.3250417709350586, + "eval_logps/chosen": -475.1689147949219, + "eval_logps/rejected": -483.9620666503906, + "eval_loss": 0.6066410541534424, + "eval_rewards/accuracies": 0.6669999957084656, + "eval_rewards/chosen": -1.4241948127746582, + "eval_rewards/margins": 0.4043256342411041, + "eval_rewards/rejected": -1.8285205364227295, + "eval_runtime": 196.8009, + "eval_samples_per_second": 10.163, + "eval_steps_per_second": 5.081, + "step": 5020 + }, + { + "epoch": 0.66, + "learning_rate": 1.5776923610082695e-06, + "logits/chosen": -2.58607816696167, + "logits/rejected": -2.5599188804626465, + "logps/chosen": -451.46417236328125, + "logps/rejected": -478.35955810546875, + "loss": 0.5567, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.3380658626556396, + "rewards/margins": 0.5778164267539978, + "rewards/rejected": -1.9158824682235718, + "step": 5030 + }, + { + "epoch": 0.66, + "eval_logits/chosen": -2.313192844390869, + "eval_logits/rejected": -2.325887441635132, + "eval_logps/chosen": -472.5576477050781, + "eval_logps/rejected": -480.987060546875, + "eval_loss": 0.6061822772026062, + "eval_rewards/accuracies": 0.6685000061988831, + "eval_rewards/chosen": -1.3980821371078491, + "eval_rewards/margins": 0.40068814158439636, + "eval_rewards/rejected": -1.7987704277038574, + "eval_runtime": 196.8955, + "eval_samples_per_second": 10.158, + "eval_steps_per_second": 5.079, + "step": 5030 + }, + { + "epoch": 0.66, + "learning_rate": 1.5670854432597433e-06, + "logits/chosen": -2.4839038848876953, + "logits/rejected": -2.4908900260925293, + "logps/chosen": -514.7501220703125, + "logps/rejected": -471.20782470703125, + "loss": 0.6432, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -1.435762643814087, + "rewards/margins": 0.2490122765302658, + "rewards/rejected": -1.6847747564315796, + "step": 5040 + }, + { + "epoch": 0.66, + "eval_logits/chosen": -2.3156557083129883, + "eval_logits/rejected": -2.3286592960357666, + "eval_logps/chosen": -468.1942443847656, + "eval_logps/rejected": -475.9248046875, + "eval_loss": 0.6063724160194397, + "eval_rewards/accuracies": 0.6660000085830688, + "eval_rewards/chosen": -1.3544481992721558, + "eval_rewards/margins": 0.39369943737983704, + "eval_rewards/rejected": -1.7481478452682495, + "eval_runtime": 196.7985, + "eval_samples_per_second": 10.163, + "eval_steps_per_second": 5.081, + "step": 5040 + }, + { + "epoch": 0.66, + "learning_rate": 1.556498000137104e-06, + "logits/chosen": -2.40048885345459, + "logits/rejected": -2.391714572906494, + "logps/chosen": -435.9031677246094, + "logps/rejected": -444.23785400390625, + "loss": 0.5867, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -1.3554993867874146, + "rewards/margins": 0.4399290084838867, + "rewards/rejected": -1.7954285144805908, + "step": 5050 + }, + { + "epoch": 0.66, + "eval_logits/chosen": -2.315324068069458, + "eval_logits/rejected": -2.3286914825439453, + "eval_logps/chosen": -465.41534423828125, + "eval_logps/rejected": -472.6462707519531, + "eval_loss": 0.6065265536308289, + "eval_rewards/accuracies": 0.6660000085830688, + "eval_rewards/chosen": -1.3266593217849731, + "eval_rewards/margins": 0.3887033462524414, + "eval_rewards/rejected": -1.715362787246704, + "eval_runtime": 197.0067, + "eval_samples_per_second": 10.152, + "eval_steps_per_second": 5.076, + "step": 5050 + }, + { + "epoch": 0.66, + "learning_rate": 1.5459302526536188e-06, + "logits/chosen": -2.496645450592041, + "logits/rejected": -2.4642739295959473, + "logps/chosen": -450.39715576171875, + "logps/rejected": -466.58416748046875, + "loss": 0.6339, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -1.3087141513824463, + "rewards/margins": 0.37976545095443726, + "rewards/rejected": -1.6884794235229492, + "step": 5060 + }, + { + "epoch": 0.66, + "eval_logits/chosen": -2.3121721744537354, + "eval_logits/rejected": -2.325657844543457, + "eval_logps/chosen": -464.2017822265625, + "eval_logps/rejected": -471.1905822753906, + "eval_loss": 0.6065158843994141, + "eval_rewards/accuracies": 0.6675000190734863, + "eval_rewards/chosen": -1.3145238161087036, + "eval_rewards/margins": 0.3862822651863098, + "eval_rewards/rejected": -1.7008060216903687, + "eval_runtime": 196.9539, + "eval_samples_per_second": 10.155, + "eval_steps_per_second": 5.077, + "step": 5060 + }, + { + "epoch": 0.66, + "learning_rate": 1.5353824214114075e-06, + "logits/chosen": -2.6206235885620117, + "logits/rejected": -2.6003384590148926, + "logps/chosen": -457.868896484375, + "logps/rejected": -479.43768310546875, + "loss": 0.5926, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.3027828931808472, + "rewards/margins": 0.39749962091445923, + "rewards/rejected": -1.7002826929092407, + "step": 5070 + }, + { + "epoch": 0.66, + "eval_logits/chosen": -2.3142552375793457, + "eval_logits/rejected": -2.3274495601654053, + "eval_logps/chosen": -464.9194030761719, + "eval_logps/rejected": -471.9013977050781, + "eval_loss": 0.6058085560798645, + "eval_rewards/accuracies": 0.6664999723434448, + "eval_rewards/chosen": -1.321699857711792, + "eval_rewards/margins": 0.386214017868042, + "eval_rewards/rejected": -1.7079139947891235, + "eval_runtime": 197.0686, + "eval_samples_per_second": 10.149, + "eval_steps_per_second": 5.074, + "step": 5070 + }, + { + "epoch": 0.66, + "learning_rate": 1.5248547265968373e-06, + "logits/chosen": -2.583876371383667, + "logits/rejected": -2.569124698638916, + "logps/chosen": -426.1070861816406, + "logps/rejected": -461.442626953125, + "loss": 0.5513, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.2332156896591187, + "rewards/margins": 0.5031381845474243, + "rewards/rejected": -1.7363536357879639, + "step": 5080 + }, + { + "epoch": 0.66, + "eval_logits/chosen": -2.3098928928375244, + "eval_logits/rejected": -2.3230464458465576, + "eval_logps/chosen": -466.5625305175781, + "eval_logps/rejected": -473.8684997558594, + "eval_loss": 0.6062521934509277, + "eval_rewards/accuracies": 0.6654999852180481, + "eval_rewards/chosen": -1.338131070137024, + "eval_rewards/margins": 0.3894534111022949, + "eval_rewards/rejected": -1.7275844812393188, + "eval_runtime": 196.9038, + "eval_samples_per_second": 10.157, + "eval_steps_per_second": 5.079, + "step": 5080 + }, + { + "epoch": 0.67, + "learning_rate": 1.5143473879759265e-06, + "logits/chosen": -2.5847601890563965, + "logits/rejected": -2.500302791595459, + "logps/chosen": -431.3771057128906, + "logps/rejected": -438.20379638671875, + "loss": 0.5409, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.2843835353851318, + "rewards/margins": 0.6454133987426758, + "rewards/rejected": -1.929796814918518, + "step": 5090 + }, + { + "epoch": 0.67, + "eval_logits/chosen": -2.3079354763031006, + "eval_logits/rejected": -2.320760488510132, + "eval_logps/chosen": -468.5992431640625, + "eval_logps/rejected": -476.30364990234375, + "eval_loss": 0.6059185266494751, + "eval_rewards/accuracies": 0.6669999957084656, + "eval_rewards/chosen": -1.358498454093933, + "eval_rewards/margins": 0.3934376835823059, + "eval_rewards/rejected": -1.7519360780715942, + "eval_runtime": 197.2696, + "eval_samples_per_second": 10.138, + "eval_steps_per_second": 5.069, + "step": 5090 + }, + { + "epoch": 0.67, + "learning_rate": 1.5038606248897586e-06, + "logits/chosen": -2.519559144973755, + "logits/rejected": -2.530374050140381, + "logps/chosen": -504.1170959472656, + "logps/rejected": -497.4813537597656, + "loss": 0.6739, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.510613203048706, + "rewards/margins": 0.20000800490379333, + "rewards/rejected": -1.7106212377548218, + "step": 5100 + }, + { + "epoch": 0.67, + "eval_logits/chosen": -2.3100926876068115, + "eval_logits/rejected": -2.3227438926696777, + "eval_logps/chosen": -469.88934326171875, + "eval_logps/rejected": -477.7745666503906, + "eval_loss": 0.6048146486282349, + "eval_rewards/accuracies": 0.6690000295639038, + "eval_rewards/chosen": -1.3713992834091187, + "eval_rewards/margins": 0.3952457904815674, + "eval_rewards/rejected": -1.7666451930999756, + "eval_runtime": 197.3203, + "eval_samples_per_second": 10.136, + "eval_steps_per_second": 5.068, + "step": 5100 + }, + { + "epoch": 0.67, + "learning_rate": 1.4933946562499008e-06, + "logits/chosen": -2.4187283515930176, + "logits/rejected": -2.424403667449951, + "logps/chosen": -458.12347412109375, + "logps/rejected": -449.3030700683594, + "loss": 0.625, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.3385467529296875, + "rewards/margins": 0.34443196654319763, + "rewards/rejected": -1.682978868484497, + "step": 5110 + }, + { + "epoch": 0.67, + "eval_logits/chosen": -2.309351921081543, + "eval_logits/rejected": -2.3218774795532227, + "eval_logps/chosen": -469.4795227050781, + "eval_logps/rejected": -477.3433837890625, + "eval_loss": 0.6042229533195496, + "eval_rewards/accuracies": 0.6704999804496765, + "eval_rewards/chosen": -1.3673009872436523, + "eval_rewards/margins": 0.39503201842308044, + "eval_rewards/rejected": -1.7623330354690552, + "eval_runtime": 196.9639, + "eval_samples_per_second": 10.154, + "eval_steps_per_second": 5.077, + "step": 5110 + }, + { + "epoch": 0.67, + "learning_rate": 1.482949700533835e-06, + "logits/chosen": -2.388120174407959, + "logits/rejected": -2.3988916873931885, + "logps/chosen": -408.99066162109375, + "logps/rejected": -426.799560546875, + "loss": 0.5985, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -1.331207036972046, + "rewards/margins": 0.37625521421432495, + "rewards/rejected": -1.7074623107910156, + "step": 5120 + }, + { + "epoch": 0.67, + "eval_logits/chosen": -2.307891845703125, + "eval_logits/rejected": -2.320222854614258, + "eval_logps/chosen": -467.59051513671875, + "eval_logps/rejected": -475.2369079589844, + "eval_loss": 0.6042217016220093, + "eval_rewards/accuracies": 0.671999990940094, + "eval_rewards/chosen": -1.348410725593567, + "eval_rewards/margins": 0.39285799860954285, + "eval_rewards/rejected": -1.7412687540054321, + "eval_runtime": 196.9823, + "eval_samples_per_second": 10.153, + "eval_steps_per_second": 5.077, + "step": 5120 + }, + { + "epoch": 0.67, + "learning_rate": 1.4725259757803983e-06, + "logits/chosen": -2.6179652214050293, + "logits/rejected": -2.5962462425231934, + "logps/chosen": -518.4244995117188, + "logps/rejected": -508.89996337890625, + "loss": 0.5549, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.2763904333114624, + "rewards/margins": 0.5201258659362793, + "rewards/rejected": -1.7965164184570312, + "step": 5130 + }, + { + "epoch": 0.67, + "eval_logits/chosen": -2.306852340698242, + "eval_logits/rejected": -2.3192129135131836, + "eval_logps/chosen": -468.34869384765625, + "eval_logps/rejected": -476.2039489746094, + "eval_loss": 0.6043887734413147, + "eval_rewards/accuracies": 0.6725000143051147, + "eval_rewards/chosen": -1.3559925556182861, + "eval_rewards/margins": 0.39494654536247253, + "eval_rewards/rejected": -1.7509392499923706, + "eval_runtime": 197.059, + "eval_samples_per_second": 10.149, + "eval_steps_per_second": 5.075, + "step": 5130 + }, + { + "epoch": 0.67, + "learning_rate": 1.4621236995852314e-06, + "logits/chosen": -2.6084470748901367, + "logits/rejected": -2.593048095703125, + "logps/chosen": -468.4862365722656, + "logps/rejected": -494.2373962402344, + "loss": 0.538, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -1.3402001857757568, + "rewards/margins": 0.5544275045394897, + "rewards/rejected": -1.8946278095245361, + "step": 5140 + }, + { + "epoch": 0.67, + "eval_logits/chosen": -2.3012852668762207, + "eval_logits/rejected": -2.313300132751465, + "eval_logps/chosen": -470.0094909667969, + "eval_logps/rejected": -478.3188171386719, + "eval_loss": 0.6046092510223389, + "eval_rewards/accuracies": 0.6694999933242798, + "eval_rewards/chosen": -1.372600793838501, + "eval_rewards/margins": 0.3994869589805603, + "eval_rewards/rejected": -1.772087812423706, + "eval_runtime": 197.0317, + "eval_samples_per_second": 10.151, + "eval_steps_per_second": 5.075, + "step": 5140 + }, + { + "epoch": 0.67, + "learning_rate": 1.4517430890962337e-06, + "logits/chosen": -2.5578713417053223, + "logits/rejected": -2.462035894393921, + "logps/chosen": -484.83935546875, + "logps/rejected": -417.79461669921875, + "loss": 0.5572, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -1.3011386394500732, + "rewards/margins": 0.4909875988960266, + "rewards/rejected": -1.7921262979507446, + "step": 5150 + }, + { + "epoch": 0.67, + "eval_logits/chosen": -2.3006138801574707, + "eval_logits/rejected": -2.3119466304779053, + "eval_logps/chosen": -471.7322082519531, + "eval_logps/rejected": -480.45635986328125, + "eval_loss": 0.6042333245277405, + "eval_rewards/accuracies": 0.6704999804496765, + "eval_rewards/chosen": -1.389828085899353, + "eval_rewards/margins": 0.4036352038383484, + "eval_rewards/rejected": -1.7934633493423462, + "eval_runtime": 197.062, + "eval_samples_per_second": 10.149, + "eval_steps_per_second": 5.075, + "step": 5150 + }, + { + "epoch": 0.68, + "learning_rate": 1.4413843610090342e-06, + "logits/chosen": -2.559861183166504, + "logits/rejected": -2.483541488647461, + "logps/chosen": -505.0181579589844, + "logps/rejected": -504.79815673828125, + "loss": 0.6035, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.4536011219024658, + "rewards/margins": 0.45139384269714355, + "rewards/rejected": -1.9049949645996094, + "step": 5160 + }, + { + "epoch": 0.68, + "eval_logits/chosen": -2.3040931224823, + "eval_logits/rejected": -2.3148891925811768, + "eval_logps/chosen": -473.65966796875, + "eval_logps/rejected": -482.7805480957031, + "eval_loss": 0.6035750508308411, + "eval_rewards/accuracies": 0.6710000038146973, + "eval_rewards/chosen": -1.4091025590896606, + "eval_rewards/margins": 0.4076029360294342, + "eval_rewards/rejected": -1.816705584526062, + "eval_runtime": 197.095, + "eval_samples_per_second": 10.147, + "eval_steps_per_second": 5.074, + "step": 5160 + }, + { + "epoch": 0.68, + "learning_rate": 1.4310477315624637e-06, + "logits/chosen": -2.513333797454834, + "logits/rejected": -2.5067684650421143, + "logps/chosen": -457.77783203125, + "logps/rejected": -470.79052734375, + "loss": 0.6602, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.455190896987915, + "rewards/margins": 0.28036192059516907, + "rewards/rejected": -1.7355530261993408, + "step": 5170 + }, + { + "epoch": 0.68, + "eval_logits/chosen": -2.308779001235962, + "eval_logits/rejected": -2.319303274154663, + "eval_logps/chosen": -469.47418212890625, + "eval_logps/rejected": -478.0995178222656, + "eval_loss": 0.6027604937553406, + "eval_rewards/accuracies": 0.6694999933242798, + "eval_rewards/chosen": -1.3672480583190918, + "eval_rewards/margins": 0.4026472270488739, + "eval_rewards/rejected": -1.769895315170288, + "eval_runtime": 197.0407, + "eval_samples_per_second": 10.15, + "eval_steps_per_second": 5.075, + "step": 5170 + }, + { + "epoch": 0.68, + "learning_rate": 1.420733416534045e-06, + "logits/chosen": -2.38897442817688, + "logits/rejected": -2.3405518531799316, + "logps/chosen": -443.81549072265625, + "logps/rejected": -463.55303955078125, + "loss": 0.6586, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.3974109888076782, + "rewards/margins": 0.30270710587501526, + "rewards/rejected": -1.700118064880371, + "step": 5180 + }, + { + "epoch": 0.68, + "eval_logits/chosen": -2.3164751529693604, + "eval_logits/rejected": -2.327291488647461, + "eval_logps/chosen": -464.68194580078125, + "eval_logps/rejected": -472.7758483886719, + "eval_loss": 0.6024616360664368, + "eval_rewards/accuracies": 0.6694999933242798, + "eval_rewards/chosen": -1.3193248510360718, + "eval_rewards/margins": 0.3973331153392792, + "eval_rewards/rejected": -1.7166579961776733, + "eval_runtime": 197.0127, + "eval_samples_per_second": 10.152, + "eval_steps_per_second": 5.076, + "step": 5180 + }, + { + "epoch": 0.68, + "learning_rate": 1.410441631235487e-06, + "logits/chosen": -2.5416388511657715, + "logits/rejected": -2.523131847381592, + "logps/chosen": -464.9375, + "logps/rejected": -487.29638671875, + "loss": 0.602, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -1.2932493686676025, + "rewards/margins": 0.3969052731990814, + "rewards/rejected": -1.6901544332504272, + "step": 5190 + }, + { + "epoch": 0.68, + "eval_logits/chosen": -2.320040464401245, + "eval_logits/rejected": -2.330761432647705, + "eval_logps/chosen": -463.5137634277344, + "eval_logps/rejected": -471.38946533203125, + "eval_loss": 0.6024397015571594, + "eval_rewards/accuracies": 0.6685000061988831, + "eval_rewards/chosen": -1.3076434135437012, + "eval_rewards/margins": 0.3951510787010193, + "eval_rewards/rejected": -1.7027945518493652, + "eval_runtime": 197.001, + "eval_samples_per_second": 10.152, + "eval_steps_per_second": 5.076, + "step": 5190 + }, + { + "epoch": 0.68, + "learning_rate": 1.4001725905081868e-06, + "logits/chosen": -2.5292303562164307, + "logits/rejected": -2.511136531829834, + "logps/chosen": -422.4544982910156, + "logps/rejected": -407.1893310546875, + "loss": 0.5896, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -1.3296085596084595, + "rewards/margins": 0.3973914682865143, + "rewards/rejected": -1.7269999980926514, + "step": 5200 + }, + { + "epoch": 0.68, + "eval_logits/chosen": -2.3227202892303467, + "eval_logits/rejected": -2.333261251449585, + "eval_logps/chosen": -462.600830078125, + "eval_logps/rejected": -470.4217224121094, + "eval_loss": 0.6021357178688049, + "eval_rewards/accuracies": 0.6679999828338623, + "eval_rewards/chosen": -1.2985141277313232, + "eval_rewards/margins": 0.3946027457714081, + "eval_rewards/rejected": -1.6931169033050537, + "eval_runtime": 196.6463, + "eval_samples_per_second": 10.171, + "eval_steps_per_second": 5.085, + "step": 5200 + }, + { + "epoch": 0.68, + "learning_rate": 1.3899265087187507e-06, + "logits/chosen": -2.5664708614349365, + "logits/rejected": -2.5287675857543945, + "logps/chosen": -410.075439453125, + "logps/rejected": -426.7511291503906, + "loss": 0.5838, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.2405273914337158, + "rewards/margins": 0.39256519079208374, + "rewards/rejected": -1.6330926418304443, + "step": 5210 + }, + { + "epoch": 0.68, + "eval_logits/chosen": -2.322103977203369, + "eval_logits/rejected": -2.3327839374542236, + "eval_logps/chosen": -461.9039306640625, + "eval_logps/rejected": -469.6353759765625, + "eval_loss": 0.6023078560829163, + "eval_rewards/accuracies": 0.6694999933242798, + "eval_rewards/chosen": -1.2915451526641846, + "eval_rewards/margins": 0.39370810985565186, + "eval_rewards/rejected": -1.6852531433105469, + "eval_runtime": 196.8621, + "eval_samples_per_second": 10.159, + "eval_steps_per_second": 5.08, + "step": 5210 + }, + { + "epoch": 0.68, + "learning_rate": 1.3797035997545144e-06, + "logits/chosen": -2.5763635635375977, + "logits/rejected": -2.527101993560791, + "logps/chosen": -473.539794921875, + "logps/rejected": -478.39569091796875, + "loss": 0.5508, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.1910579204559326, + "rewards/margins": 0.48212796449661255, + "rewards/rejected": -1.67318594455719, + "step": 5220 + }, + { + "epoch": 0.68, + "eval_logits/chosen": -2.3128867149353027, + "eval_logits/rejected": -2.323371410369873, + "eval_logps/chosen": -464.56024169921875, + "eval_logps/rejected": -472.73760986328125, + "eval_loss": 0.6023849844932556, + "eval_rewards/accuracies": 0.6710000038146973, + "eval_rewards/chosen": -1.318108320236206, + "eval_rewards/margins": 0.3981679081916809, + "eval_rewards/rejected": -1.7162760496139526, + "eval_runtime": 196.9084, + "eval_samples_per_second": 10.157, + "eval_steps_per_second": 5.079, + "step": 5220 + }, + { + "epoch": 0.68, + "learning_rate": 1.3695040770190816e-06, + "logits/chosen": -2.554281234741211, + "logits/rejected": -2.5586276054382324, + "logps/chosen": -431.57958984375, + "logps/rejected": -451.42913818359375, + "loss": 0.6031, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.2992688417434692, + "rewards/margins": 0.37253737449645996, + "rewards/rejected": -1.6718060970306396, + "step": 5230 + }, + { + "epoch": 0.68, + "eval_logits/chosen": -2.3034961223602295, + "eval_logits/rejected": -2.3134658336639404, + "eval_logps/chosen": -468.2120056152344, + "eval_logps/rejected": -476.7961730957031, + "eval_loss": 0.6026508808135986, + "eval_rewards/accuracies": 0.6690000295639038, + "eval_rewards/chosen": -1.354626178741455, + "eval_rewards/margins": 0.402235746383667, + "eval_rewards/rejected": -1.756861925125122, + "eval_runtime": 197.2352, + "eval_samples_per_second": 10.14, + "eval_steps_per_second": 5.07, + "step": 5230 + }, + { + "epoch": 0.69, + "learning_rate": 1.3593281534278651e-06, + "logits/chosen": -2.472536563873291, + "logits/rejected": -2.5110905170440674, + "logps/chosen": -414.9605407714844, + "logps/rejected": -476.60076904296875, + "loss": 0.5353, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -1.293400526046753, + "rewards/margins": 0.5274486541748047, + "rewards/rejected": -1.8208494186401367, + "step": 5240 + }, + { + "epoch": 0.69, + "eval_logits/chosen": -2.300142526626587, + "eval_logits/rejected": -2.3098344802856445, + "eval_logps/chosen": -471.1337585449219, + "eval_logps/rejected": -479.9906005859375, + "eval_loss": 0.6028639078140259, + "eval_rewards/accuracies": 0.6704999804496765, + "eval_rewards/chosen": -1.3838437795639038, + "eval_rewards/margins": 0.4049619436264038, + "eval_rewards/rejected": -1.7888059616088867, + "eval_runtime": 197.0056, + "eval_samples_per_second": 10.152, + "eval_steps_per_second": 5.076, + "step": 5240 + }, + { + "epoch": 0.69, + "learning_rate": 1.3491760414036478e-06, + "logits/chosen": -2.4985485076904297, + "logits/rejected": -2.4522864818573, + "logps/chosen": -497.62725830078125, + "logps/rejected": -464.302978515625, + "loss": 0.6128, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.3614141941070557, + "rewards/margins": 0.38570067286491394, + "rewards/rejected": -1.7471147775650024, + "step": 5250 + }, + { + "epoch": 0.69, + "eval_logits/chosen": -2.2994742393493652, + "eval_logits/rejected": -2.3090596199035645, + "eval_logps/chosen": -471.99444580078125, + "eval_logps/rejected": -481.0536193847656, + "eval_loss": 0.6028826832771301, + "eval_rewards/accuracies": 0.6710000038146973, + "eval_rewards/chosen": -1.3924506902694702, + "eval_rewards/margins": 0.4069855213165283, + "eval_rewards/rejected": -1.799436330795288, + "eval_runtime": 197.0298, + "eval_samples_per_second": 10.151, + "eval_steps_per_second": 5.075, + "step": 5250 + }, + { + "epoch": 0.69, + "learning_rate": 1.3390479528721444e-06, + "logits/chosen": -2.4176363945007324, + "logits/rejected": -2.4405970573425293, + "logps/chosen": -457.1136779785156, + "logps/rejected": -496.7222595214844, + "loss": 0.6085, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.4070097208023071, + "rewards/margins": 0.40776365995407104, + "rewards/rejected": -1.8147733211517334, + "step": 5260 + }, + { + "epoch": 0.69, + "eval_logits/chosen": -2.300313711166382, + "eval_logits/rejected": -2.3098363876342773, + "eval_logps/chosen": -472.0662536621094, + "eval_logps/rejected": -481.25726318359375, + "eval_loss": 0.6027334928512573, + "eval_rewards/accuracies": 0.6664999723434448, + "eval_rewards/chosen": -1.393168330192566, + "eval_rewards/margins": 0.4083041250705719, + "eval_rewards/rejected": -1.8014723062515259, + "eval_runtime": 197.0593, + "eval_samples_per_second": 10.149, + "eval_steps_per_second": 5.075, + "step": 5260 + }, + { + "epoch": 0.69, + "learning_rate": 1.3289440992575756e-06, + "logits/chosen": -2.5740933418273926, + "logits/rejected": -2.5310654640197754, + "logps/chosen": -502.00408935546875, + "logps/rejected": -504.9661560058594, + "loss": 0.567, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -1.3182388544082642, + "rewards/margins": 0.45366114377975464, + "rewards/rejected": -1.771899938583374, + "step": 5270 + }, + { + "epoch": 0.69, + "eval_logits/chosen": -2.2995877265930176, + "eval_logits/rejected": -2.309088706970215, + "eval_logps/chosen": -471.35662841796875, + "eval_logps/rejected": -480.6314392089844, + "eval_loss": 0.6026535034179688, + "eval_rewards/accuracies": 0.6685000061988831, + "eval_rewards/chosen": -1.386072039604187, + "eval_rewards/margins": 0.40914198756217957, + "eval_rewards/rejected": -1.795214056968689, + "eval_runtime": 196.9223, + "eval_samples_per_second": 10.156, + "eval_steps_per_second": 5.078, + "step": 5270 + }, + { + "epoch": 0.69, + "learning_rate": 1.3188646914782616e-06, + "logits/chosen": -2.597381353378296, + "logits/rejected": -2.5523602962493896, + "logps/chosen": -549.8211669921875, + "logps/rejected": -480.4268493652344, + "loss": 0.5273, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -1.4053056240081787, + "rewards/margins": 0.5496398210525513, + "rewards/rejected": -1.9549453258514404, + "step": 5280 + }, + { + "epoch": 0.69, + "eval_logits/chosen": -2.29966402053833, + "eval_logits/rejected": -2.3090415000915527, + "eval_logps/chosen": -472.23272705078125, + "eval_logps/rejected": -481.6187438964844, + "eval_loss": 0.6027253270149231, + "eval_rewards/accuracies": 0.6690000295639038, + "eval_rewards/chosen": -1.3948334455490112, + "eval_rewards/margins": 0.41025370359420776, + "eval_rewards/rejected": -1.8050872087478638, + "eval_runtime": 196.8822, + "eval_samples_per_second": 10.158, + "eval_steps_per_second": 5.079, + "step": 5280 + }, + { + "epoch": 0.69, + "learning_rate": 1.3088099399422109e-06, + "logits/chosen": -2.586010456085205, + "logits/rejected": -2.5378670692443848, + "logps/chosen": -488.80267333984375, + "logps/rejected": -491.5059509277344, + "loss": 0.6205, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -1.334667682647705, + "rewards/margins": 0.39315497875213623, + "rewards/rejected": -1.7278226613998413, + "step": 5290 + }, + { + "epoch": 0.69, + "eval_logits/chosen": -2.3015239238739014, + "eval_logits/rejected": -2.310614824295044, + "eval_logps/chosen": -471.9217834472656, + "eval_logps/rejected": -481.25775146484375, + "eval_loss": 0.6026984453201294, + "eval_rewards/accuracies": 0.671999990940094, + "eval_rewards/chosen": -1.391723871231079, + "eval_rewards/margins": 0.40975335240364075, + "eval_rewards/rejected": -1.8014771938323975, + "eval_runtime": 196.9474, + "eval_samples_per_second": 10.155, + "eval_steps_per_second": 5.077, + "step": 5290 + }, + { + "epoch": 0.69, + "learning_rate": 1.2987800545427353e-06, + "logits/chosen": -2.566490650177002, + "logits/rejected": -2.4875643253326416, + "logps/chosen": -482.4261169433594, + "logps/rejected": -494.2449645996094, + "loss": 0.5601, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.377469778060913, + "rewards/margins": 0.5372947454452515, + "rewards/rejected": -1.914764404296875, + "step": 5300 + }, + { + "epoch": 0.69, + "eval_logits/chosen": -2.3011436462402344, + "eval_logits/rejected": -2.3103692531585693, + "eval_logps/chosen": -471.6266174316406, + "eval_logps/rejected": -481.00421142578125, + "eval_loss": 0.6028599739074707, + "eval_rewards/accuracies": 0.6700000166893005, + "eval_rewards/chosen": -1.3887721300125122, + "eval_rewards/margins": 0.4101700484752655, + "eval_rewards/rejected": -1.7989420890808105, + "eval_runtime": 196.6641, + "eval_samples_per_second": 10.17, + "eval_steps_per_second": 5.085, + "step": 5300 + }, + { + "epoch": 0.69, + "learning_rate": 1.288775244654062e-06, + "logits/chosen": -2.5995917320251465, + "logits/rejected": -2.578198194503784, + "logps/chosen": -530.6082153320312, + "logps/rejected": -501.57861328125, + "loss": 0.6486, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -1.3748692274093628, + "rewards/margins": 0.3246624767780304, + "rewards/rejected": -1.6995317935943604, + "step": 5310 + }, + { + "epoch": 0.69, + "eval_logits/chosen": -2.303572654724121, + "eval_logits/rejected": -2.3130619525909424, + "eval_logps/chosen": -470.4731140136719, + "eval_logps/rejected": -479.8280334472656, + "eval_loss": 0.6028394103050232, + "eval_rewards/accuracies": 0.6694999933242798, + "eval_rewards/chosen": -1.3772375583648682, + "eval_rewards/margins": 0.4099426567554474, + "eval_rewards/rejected": -1.7871803045272827, + "eval_runtime": 196.8546, + "eval_samples_per_second": 10.16, + "eval_steps_per_second": 5.08, + "step": 5310 + }, + { + "epoch": 0.7, + "learning_rate": 1.2787957191269696e-06, + "logits/chosen": -2.4609122276306152, + "logits/rejected": -2.4693045616149902, + "logps/chosen": -468.830322265625, + "logps/rejected": -495.522216796875, + "loss": 0.6643, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -1.389495611190796, + "rewards/margins": 0.28900545835494995, + "rewards/rejected": -1.6785008907318115, + "step": 5320 + }, + { + "epoch": 0.7, + "eval_logits/chosen": -2.305281400680542, + "eval_logits/rejected": -2.3151094913482666, + "eval_logps/chosen": -466.90655517578125, + "eval_logps/rejected": -475.7928771972656, + "eval_loss": 0.6025946140289307, + "eval_rewards/accuracies": 0.6685000061988831, + "eval_rewards/chosen": -1.341571569442749, + "eval_rewards/margins": 0.4052570164203644, + "eval_rewards/rejected": -1.7468284368515015, + "eval_runtime": 196.8357, + "eval_samples_per_second": 10.161, + "eval_steps_per_second": 5.08, + "step": 5320 + }, + { + "epoch": 0.7, + "learning_rate": 1.2688416862844193e-06, + "logits/chosen": -2.4436516761779785, + "logits/rejected": -2.497119426727295, + "logps/chosen": -410.174072265625, + "logps/rejected": -484.532470703125, + "loss": 0.5421, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.1837115287780762, + "rewards/margins": 0.5746269822120667, + "rewards/rejected": -1.7583385705947876, + "step": 5330 + }, + { + "epoch": 0.7, + "eval_logits/chosen": -2.3095591068267822, + "eval_logits/rejected": -2.319445848464966, + "eval_logps/chosen": -464.5736083984375, + "eval_logps/rejected": -473.2466125488281, + "eval_loss": 0.6024113893508911, + "eval_rewards/accuracies": 0.671500027179718, + "eval_rewards/chosen": -1.3182419538497925, + "eval_rewards/margins": 0.40312403440475464, + "eval_rewards/rejected": -1.7213659286499023, + "eval_runtime": 196.7266, + "eval_samples_per_second": 10.166, + "eval_steps_per_second": 5.083, + "step": 5330 + }, + { + "epoch": 0.7, + "learning_rate": 1.2589133539172193e-06, + "logits/chosen": -2.6252217292785645, + "logits/rejected": -2.5867104530334473, + "logps/chosen": -479.29510498046875, + "logps/rejected": -485.63214111328125, + "loss": 0.5113, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.0865707397460938, + "rewards/margins": 0.601614773273468, + "rewards/rejected": -1.688185453414917, + "step": 5340 + }, + { + "epoch": 0.7, + "eval_logits/chosen": -2.302642345428467, + "eval_logits/rejected": -2.312276840209961, + "eval_logps/chosen": -468.6505432128906, + "eval_logps/rejected": -478.03350830078125, + "eval_loss": 0.602563738822937, + "eval_rewards/accuracies": 0.6690000295639038, + "eval_rewards/chosen": -1.3590114116668701, + "eval_rewards/margins": 0.4102230370044708, + "eval_rewards/rejected": -1.7692344188690186, + "eval_runtime": 196.8303, + "eval_samples_per_second": 10.161, + "eval_steps_per_second": 5.081, + "step": 5340 + }, + { + "epoch": 0.7, + "learning_rate": 1.249010929279672e-06, + "logits/chosen": -2.6182000637054443, + "logits/rejected": -2.5885214805603027, + "logps/chosen": -475.0232849121094, + "logps/rejected": -491.6460876464844, + "loss": 0.6035, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.3365066051483154, + "rewards/margins": 0.3971422016620636, + "rewards/rejected": -1.7336488962173462, + "step": 5350 + }, + { + "epoch": 0.7, + "eval_logits/chosen": -2.3038649559020996, + "eval_logits/rejected": -2.3131213188171387, + "eval_logps/chosen": -471.2071228027344, + "eval_logps/rejected": -480.9702453613281, + "eval_loss": 0.6023095846176147, + "eval_rewards/accuracies": 0.6664999723434448, + "eval_rewards/chosen": -1.3845771551132202, + "eval_rewards/margins": 0.41402512788772583, + "eval_rewards/rejected": -1.7986023426055908, + "eval_runtime": 196.8981, + "eval_samples_per_second": 10.158, + "eval_steps_per_second": 5.079, + "step": 5350 + }, + { + "epoch": 0.7, + "learning_rate": 1.2391346190852603e-06, + "logits/chosen": -2.604792833328247, + "logits/rejected": -2.582808017730713, + "logps/chosen": -467.715087890625, + "logps/rejected": -480.80731201171875, + "loss": 0.624, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.4067754745483398, + "rewards/margins": 0.4088035225868225, + "rewards/rejected": -1.8155790567398071, + "step": 5360 + }, + { + "epoch": 0.7, + "eval_logits/chosen": -2.2990853786468506, + "eval_logits/rejected": -2.308011293411255, + "eval_logps/chosen": -474.9680480957031, + "eval_logps/rejected": -485.0401916503906, + "eval_loss": 0.6023436188697815, + "eval_rewards/accuracies": 0.6675000190734863, + "eval_rewards/chosen": -1.4221864938735962, + "eval_rewards/margins": 0.4171146750450134, + "eval_rewards/rejected": -1.8393012285232544, + "eval_runtime": 196.9161, + "eval_samples_per_second": 10.157, + "eval_steps_per_second": 5.078, + "step": 5360 + }, + { + "epoch": 0.7, + "learning_rate": 1.2292846295023222e-06, + "logits/chosen": -2.5381789207458496, + "logits/rejected": -2.5520262718200684, + "logps/chosen": -516.1911010742188, + "logps/rejected": -499.8275451660156, + "loss": 0.6991, + "rewards/accuracies": 0.512499988079071, + "rewards/chosen": -1.5526403188705444, + "rewards/margins": 0.15553751587867737, + "rewards/rejected": -1.708177924156189, + "step": 5370 + }, + { + "epoch": 0.7, + "eval_logits/chosen": -2.297722578048706, + "eval_logits/rejected": -2.3069052696228027, + "eval_logps/chosen": -473.8943786621094, + "eval_logps/rejected": -483.74273681640625, + "eval_loss": 0.6021169424057007, + "eval_rewards/accuracies": 0.6679999828338623, + "eval_rewards/chosen": -1.4114493131637573, + "eval_rewards/margins": 0.4148778021335602, + "eval_rewards/rejected": -1.8263272047042847, + "eval_runtime": 196.8862, + "eval_samples_per_second": 10.158, + "eval_steps_per_second": 5.079, + "step": 5370 + }, + { + "epoch": 0.7, + "learning_rate": 1.2194611661497576e-06, + "logits/chosen": -2.432284355163574, + "logits/rejected": -2.4482924938201904, + "logps/chosen": -470.35955810546875, + "logps/rejected": -488.89324951171875, + "loss": 0.6203, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.4455724954605103, + "rewards/margins": 0.3604838252067566, + "rewards/rejected": -1.8060563802719116, + "step": 5380 + }, + { + "epoch": 0.7, + "eval_logits/chosen": -2.299294948577881, + "eval_logits/rejected": -2.308401346206665, + "eval_logps/chosen": -474.36767578125, + "eval_logps/rejected": -484.0581970214844, + "eval_loss": 0.6022467613220215, + "eval_rewards/accuracies": 0.6685000061988831, + "eval_rewards/chosen": -1.4161828756332397, + "eval_rewards/margins": 0.4132993519306183, + "eval_rewards/rejected": -1.829482078552246, + "eval_runtime": 197.0801, + "eval_samples_per_second": 10.148, + "eval_steps_per_second": 5.074, + "step": 5380 + }, + { + "epoch": 0.71, + "learning_rate": 1.2096644340927247e-06, + "logits/chosen": -2.5367202758789062, + "logits/rejected": -2.546861171722412, + "logps/chosen": -488.73858642578125, + "logps/rejected": -516.7587890625, + "loss": 0.5684, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -1.3687140941619873, + "rewards/margins": 0.5021928548812866, + "rewards/rejected": -1.8709068298339844, + "step": 5390 + }, + { + "epoch": 0.71, + "eval_logits/chosen": -2.299215316772461, + "eval_logits/rejected": -2.3083655834198, + "eval_logps/chosen": -474.4788818359375, + "eval_logps/rejected": -484.1181640625, + "eval_loss": 0.6022253632545471, + "eval_rewards/accuracies": 0.6704999804496765, + "eval_rewards/chosen": -1.4172947406768799, + "eval_rewards/margins": 0.412786602973938, + "eval_rewards/rejected": -1.8300813436508179, + "eval_runtime": 197.0778, + "eval_samples_per_second": 10.148, + "eval_steps_per_second": 5.074, + "step": 5390 + }, + { + "epoch": 0.71, + "learning_rate": 1.19989463783837e-06, + "logits/chosen": -2.606667995452881, + "logits/rejected": -2.5329880714416504, + "logps/chosen": -507.55950927734375, + "logps/rejected": -529.9376220703125, + "loss": 0.5489, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.3215059041976929, + "rewards/margins": 0.5542451739311218, + "rewards/rejected": -1.8757511377334595, + "step": 5400 + }, + { + "epoch": 0.71, + "eval_logits/chosen": -2.3006458282470703, + "eval_logits/rejected": -2.3096537590026855, + "eval_logps/chosen": -475.47930908203125, + "eval_logps/rejected": -485.2168273925781, + "eval_loss": 0.6021424531936646, + "eval_rewards/accuracies": 0.6700000166893005, + "eval_rewards/chosen": -1.4272990226745605, + "eval_rewards/margins": 0.4137687385082245, + "eval_rewards/rejected": -1.841067910194397, + "eval_runtime": 196.93, + "eval_samples_per_second": 10.156, + "eval_steps_per_second": 5.078, + "step": 5400 + }, + { + "epoch": 0.71, + "learning_rate": 1.1901519813315495e-06, + "logits/chosen": -2.4493112564086914, + "logits/rejected": -2.4181106090545654, + "logps/chosen": -454.238525390625, + "logps/rejected": -464.66278076171875, + "loss": 0.6004, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.4211333990097046, + "rewards/margins": 0.36835595965385437, + "rewards/rejected": -1.7894893884658813, + "step": 5410 + }, + { + "epoch": 0.71, + "eval_logits/chosen": -2.298659563064575, + "eval_logits/rejected": -2.3075058460235596, + "eval_logps/chosen": -477.7009582519531, + "eval_logps/rejected": -487.77276611328125, + "eval_loss": 0.6020307540893555, + "eval_rewards/accuracies": 0.6704999804496765, + "eval_rewards/chosen": -1.4495152235031128, + "eval_rewards/margins": 0.41711264848709106, + "eval_rewards/rejected": -1.8666279315948486, + "eval_runtime": 196.8669, + "eval_samples_per_second": 10.159, + "eval_steps_per_second": 5.08, + "step": 5410 + }, + { + "epoch": 0.71, + "learning_rate": 1.1804366679505798e-06, + "logits/chosen": -2.4779162406921387, + "logits/rejected": -2.447110891342163, + "logps/chosen": -510.69012451171875, + "logps/rejected": -487.569580078125, + "loss": 0.5903, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -1.481211543083191, + "rewards/margins": 0.45118799805641174, + "rewards/rejected": -1.9323995113372803, + "step": 5420 + }, + { + "epoch": 0.71, + "eval_logits/chosen": -2.298388719558716, + "eval_logits/rejected": -2.3070218563079834, + "eval_logps/chosen": -479.8644714355469, + "eval_logps/rejected": -490.0103454589844, + "eval_loss": 0.6022910475730896, + "eval_rewards/accuracies": 0.6710000038146973, + "eval_rewards/chosen": -1.4711503982543945, + "eval_rewards/margins": 0.4178526699542999, + "eval_rewards/rejected": -1.889003038406372, + "eval_runtime": 197.0663, + "eval_samples_per_second": 10.149, + "eval_steps_per_second": 5.074, + "step": 5420 + }, + { + "epoch": 0.71, + "learning_rate": 1.1707489005029877e-06, + "logits/chosen": -2.521374225616455, + "logits/rejected": -2.524177074432373, + "logps/chosen": -473.6175842285156, + "logps/rejected": -499.723876953125, + "loss": 0.6109, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.4926834106445312, + "rewards/margins": 0.5293210744857788, + "rewards/rejected": -2.0220046043395996, + "step": 5430 + }, + { + "epoch": 0.71, + "eval_logits/chosen": -2.296948194503784, + "eval_logits/rejected": -2.3053503036499023, + "eval_logps/chosen": -481.6418151855469, + "eval_logps/rejected": -492.03350830078125, + "eval_loss": 0.6023349165916443, + "eval_rewards/accuracies": 0.671999990940094, + "eval_rewards/chosen": -1.4889241456985474, + "eval_rewards/margins": 0.4203101098537445, + "eval_rewards/rejected": -1.9092342853546143, + "eval_runtime": 196.8203, + "eval_samples_per_second": 10.162, + "eval_steps_per_second": 5.081, + "step": 5430 + }, + { + "epoch": 0.71, + "learning_rate": 1.1610888812212749e-06, + "logits/chosen": -2.4720962047576904, + "logits/rejected": -2.4360768795013428, + "logps/chosen": -490.5087890625, + "logps/rejected": -482.7325134277344, + "loss": 0.6214, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.5258370637893677, + "rewards/margins": 0.32655078172683716, + "rewards/rejected": -1.8523876667022705, + "step": 5440 + }, + { + "epoch": 0.71, + "eval_logits/chosen": -2.2956690788269043, + "eval_logits/rejected": -2.3042073249816895, + "eval_logps/chosen": -481.92974853515625, + "eval_logps/rejected": -492.52020263671875, + "eval_loss": 0.6022093892097473, + "eval_rewards/accuracies": 0.671999990940094, + "eval_rewards/chosen": -1.4918036460876465, + "eval_rewards/margins": 0.4222985506057739, + "eval_rewards/rejected": -1.91410231590271, + "eval_runtime": 196.9069, + "eval_samples_per_second": 10.157, + "eval_steps_per_second": 5.079, + "step": 5440 + }, + { + "epoch": 0.71, + "learning_rate": 1.1514568117587035e-06, + "logits/chosen": -2.538889169692993, + "logits/rejected": -2.563322067260742, + "logps/chosen": -498.38079833984375, + "logps/rejected": -502.63726806640625, + "loss": 0.6564, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -1.6392685174942017, + "rewards/margins": 0.24687933921813965, + "rewards/rejected": -1.8861478567123413, + "step": 5450 + }, + { + "epoch": 0.71, + "eval_logits/chosen": -2.2915148735046387, + "eval_logits/rejected": -2.30027437210083, + "eval_logps/chosen": -482.7044372558594, + "eval_logps/rejected": -493.3853759765625, + "eval_loss": 0.6024051308631897, + "eval_rewards/accuracies": 0.6694999933242798, + "eval_rewards/chosen": -1.4995503425598145, + "eval_rewards/margins": 0.4232032299041748, + "eval_rewards/rejected": -1.9227536916732788, + "eval_runtime": 196.9898, + "eval_samples_per_second": 10.153, + "eval_steps_per_second": 5.076, + "step": 5450 + }, + { + "epoch": 0.71, + "learning_rate": 1.1418528931850781e-06, + "logits/chosen": -2.5654962062835693, + "logits/rejected": -2.4673209190368652, + "logps/chosen": -489.5174865722656, + "logps/rejected": -485.2567443847656, + "loss": 0.5649, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.4588682651519775, + "rewards/margins": 0.5729068517684937, + "rewards/rejected": -2.0317752361297607, + "step": 5460 + }, + { + "epoch": 0.71, + "eval_logits/chosen": -2.287652015686035, + "eval_logits/rejected": -2.2963643074035645, + "eval_logps/chosen": -483.6240234375, + "eval_logps/rejected": -494.3249206542969, + "eval_loss": 0.6025742888450623, + "eval_rewards/accuracies": 0.671500027179718, + "eval_rewards/chosen": -1.5087462663650513, + "eval_rewards/margins": 0.4234027564525604, + "eval_rewards/rejected": -1.9321489334106445, + "eval_runtime": 197.2711, + "eval_samples_per_second": 10.138, + "eval_steps_per_second": 5.069, + "step": 5460 + }, + { + "epoch": 0.72, + "learning_rate": 1.1322773259825563e-06, + "logits/chosen": -2.49501371383667, + "logits/rejected": -2.4475762844085693, + "logps/chosen": -479.83837890625, + "logps/rejected": -441.84918212890625, + "loss": 0.5814, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -1.4474797248840332, + "rewards/margins": 0.4089323580265045, + "rewards/rejected": -1.8564122915267944, + "step": 5470 + }, + { + "epoch": 0.72, + "eval_logits/chosen": -2.282066822052002, + "eval_logits/rejected": -2.2901947498321533, + "eval_logps/chosen": -486.34967041015625, + "eval_logps/rejected": -497.11529541015625, + "eval_loss": 0.602572500705719, + "eval_rewards/accuracies": 0.6700000166893005, + "eval_rewards/chosen": -1.536002278327942, + "eval_rewards/margins": 0.42405039072036743, + "eval_rewards/rejected": -1.9600528478622437, + "eval_runtime": 197.1192, + "eval_samples_per_second": 10.146, + "eval_steps_per_second": 5.073, + "step": 5470 + }, + { + "epoch": 0.72, + "learning_rate": 1.1227303100414552e-06, + "logits/chosen": -2.4446446895599365, + "logits/rejected": -2.4898505210876465, + "logps/chosen": -435.85498046875, + "logps/rejected": -499.18853759765625, + "loss": 0.5398, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.460822582244873, + "rewards/margins": 0.5744005441665649, + "rewards/rejected": -2.0352234840393066, + "step": 5480 + }, + { + "epoch": 0.72, + "eval_logits/chosen": -2.2764456272125244, + "eval_logits/rejected": -2.2844159603118896, + "eval_logps/chosen": -489.5582275390625, + "eval_logps/rejected": -500.5900573730469, + "eval_loss": 0.6028984785079956, + "eval_rewards/accuracies": 0.6735000014305115, + "eval_rewards/chosen": -1.5680886507034302, + "eval_rewards/margins": 0.4267115294933319, + "eval_rewards/rejected": -1.994800090789795, + "eval_runtime": 197.1388, + "eval_samples_per_second": 10.145, + "eval_steps_per_second": 5.073, + "step": 5480 + }, + { + "epoch": 0.72, + "learning_rate": 1.113212044656087e-06, + "logits/chosen": -2.4338154792785645, + "logits/rejected": -2.4598872661590576, + "logps/chosen": -453.0787048339844, + "logps/rejected": -505.65850830078125, + "loss": 0.6122, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.5571606159210205, + "rewards/margins": 0.4209592342376709, + "rewards/rejected": -1.9781198501586914, + "step": 5490 + }, + { + "epoch": 0.72, + "eval_logits/chosen": -2.277843713760376, + "eval_logits/rejected": -2.285881996154785, + "eval_logps/chosen": -490.07916259765625, + "eval_logps/rejected": -501.286376953125, + "eval_loss": 0.6028754711151123, + "eval_rewards/accuracies": 0.6725000143051147, + "eval_rewards/chosen": -1.573297381401062, + "eval_rewards/margins": 0.42846596240997314, + "eval_rewards/rejected": -2.001763343811035, + "eval_runtime": 197.1379, + "eval_samples_per_second": 10.145, + "eval_steps_per_second": 5.073, + "step": 5490 + }, + { + "epoch": 0.72, + "learning_rate": 1.1037227285205951e-06, + "logits/chosen": -2.3397364616394043, + "logits/rejected": -2.409205675125122, + "logps/chosen": -492.1761779785156, + "logps/rejected": -530.21923828125, + "loss": 0.6479, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -1.6842491626739502, + "rewards/margins": 0.386981338262558, + "rewards/rejected": -2.07123064994812, + "step": 5500 + }, + { + "epoch": 0.72, + "eval_logits/chosen": -2.2736318111419678, + "eval_logits/rejected": -2.2816479206085205, + "eval_logps/chosen": -492.9660339355469, + "eval_logps/rejected": -504.3878173828125, + "eval_loss": 0.6026748418807983, + "eval_rewards/accuracies": 0.6740000247955322, + "eval_rewards/chosen": -1.6021665334701538, + "eval_rewards/margins": 0.4306114614009857, + "eval_rewards/rejected": -2.032778024673462, + "eval_runtime": 197.0317, + "eval_samples_per_second": 10.151, + "eval_steps_per_second": 5.075, + "step": 5500 + }, + { + "epoch": 0.72, + "learning_rate": 1.0942625597248028e-06, + "logits/chosen": -2.430037021636963, + "logits/rejected": -2.4081974029541016, + "logps/chosen": -472.9207458496094, + "logps/rejected": -475.80767822265625, + "loss": 0.5748, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.574325442314148, + "rewards/margins": 0.5580999255180359, + "rewards/rejected": -2.132425308227539, + "step": 5510 + }, + { + "epoch": 0.72, + "eval_logits/chosen": -2.2735891342163086, + "eval_logits/rejected": -2.2813940048217773, + "eval_logps/chosen": -494.952392578125, + "eval_logps/rejected": -506.3679504394531, + "eval_loss": 0.6027331948280334, + "eval_rewards/accuracies": 0.6735000014305115, + "eval_rewards/chosen": -1.6220301389694214, + "eval_rewards/margins": 0.43054893612861633, + "eval_rewards/rejected": -2.052579164505005, + "eval_runtime": 197.0977, + "eval_samples_per_second": 10.147, + "eval_steps_per_second": 5.074, + "step": 5510 + }, + { + "epoch": 0.72, + "learning_rate": 1.0848317357500854e-06, + "logits/chosen": -2.406419277191162, + "logits/rejected": -2.399305582046509, + "logps/chosen": -533.1932983398438, + "logps/rejected": -488.5306091308594, + "loss": 0.6292, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -1.7428977489471436, + "rewards/margins": 0.3303782641887665, + "rewards/rejected": -2.0732760429382324, + "step": 5520 + }, + { + "epoch": 0.72, + "eval_logits/chosen": -2.2739861011505127, + "eval_logits/rejected": -2.2815887928009033, + "eval_logps/chosen": -495.77471923828125, + "eval_logps/rejected": -507.1916809082031, + "eval_loss": 0.6024631261825562, + "eval_rewards/accuracies": 0.6735000014305115, + "eval_rewards/chosen": -1.6302530765533447, + "eval_rewards/margins": 0.43056365847587585, + "eval_rewards/rejected": -2.060816764831543, + "eval_runtime": 197.3728, + "eval_samples_per_second": 10.133, + "eval_steps_per_second": 5.067, + "step": 5520 + }, + { + "epoch": 0.72, + "learning_rate": 1.0754304534652404e-06, + "logits/chosen": -2.475829601287842, + "logits/rejected": -2.5434672832489014, + "logps/chosen": -475.5762634277344, + "logps/rejected": -535.7167358398438, + "loss": 0.6393, + "rewards/accuracies": 0.5625, + "rewards/chosen": -1.6018517017364502, + "rewards/margins": 0.27932238578796387, + "rewards/rejected": -1.881174087524414, + "step": 5530 + }, + { + "epoch": 0.72, + "eval_logits/chosen": -2.278412342071533, + "eval_logits/rejected": -2.286480665206909, + "eval_logps/chosen": -490.4624938964844, + "eval_logps/rejected": -501.47857666015625, + "eval_loss": 0.6020786166191101, + "eval_rewards/accuracies": 0.6735000014305115, + "eval_rewards/chosen": -1.577130913734436, + "eval_rewards/margins": 0.4265541732311249, + "eval_rewards/rejected": -2.0036849975585938, + "eval_runtime": 197.0737, + "eval_samples_per_second": 10.148, + "eval_steps_per_second": 5.074, + "step": 5530 + }, + { + "epoch": 0.72, + "learning_rate": 1.0660589091223854e-06, + "logits/chosen": -2.4679157733917236, + "logits/rejected": -2.430014133453369, + "logps/chosen": -423.26025390625, + "logps/rejected": -465.1514587402344, + "loss": 0.5557, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.5359132289886475, + "rewards/margins": 0.5502170920372009, + "rewards/rejected": -2.086129903793335, + "step": 5540 + }, + { + "epoch": 0.72, + "eval_logits/chosen": -2.2755799293518066, + "eval_logits/rejected": -2.2840092182159424, + "eval_logps/chosen": -489.9895935058594, + "eval_logps/rejected": -501.1238098144531, + "eval_loss": 0.6020728349685669, + "eval_rewards/accuracies": 0.671500027179718, + "eval_rewards/chosen": -1.572401762008667, + "eval_rewards/margins": 0.4277363419532776, + "eval_rewards/rejected": -2.000138282775879, + "eval_runtime": 196.8328, + "eval_samples_per_second": 10.161, + "eval_steps_per_second": 5.08, + "step": 5540 + }, + { + "epoch": 0.73, + "learning_rate": 1.0567172983528534e-06, + "logits/chosen": -2.4794580936431885, + "logits/rejected": -2.463869571685791, + "logps/chosen": -414.53021240234375, + "logps/rejected": -453.79705810546875, + "loss": 0.5549, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -1.4582284688949585, + "rewards/margins": 0.5179659724235535, + "rewards/rejected": -1.9761943817138672, + "step": 5550 + }, + { + "epoch": 0.73, + "eval_logits/chosen": -2.271183967590332, + "eval_logits/rejected": -2.2797226905822754, + "eval_logps/chosen": -490.1856384277344, + "eval_logps/rejected": -501.3836975097656, + "eval_loss": 0.6021662950515747, + "eval_rewards/accuracies": 0.6694999933242798, + "eval_rewards/chosen": -1.574361801147461, + "eval_rewards/margins": 0.4283748269081116, + "eval_rewards/rejected": -2.002736806869507, + "eval_runtime": 197.0997, + "eval_samples_per_second": 10.147, + "eval_steps_per_second": 5.074, + "step": 5550 + }, + { + "epoch": 0.73, + "learning_rate": 1.0474058161631168e-06, + "logits/chosen": -2.5028529167175293, + "logits/rejected": -2.4594624042510986, + "logps/chosen": -553.6297607421875, + "logps/rejected": -557.9371337890625, + "loss": 0.6443, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -1.6656099557876587, + "rewards/margins": 0.32611754536628723, + "rewards/rejected": -1.9917274713516235, + "step": 5560 + }, + { + "epoch": 0.73, + "eval_logits/chosen": -2.270707845687866, + "eval_logits/rejected": -2.279705047607422, + "eval_logps/chosen": -488.2053527832031, + "eval_logps/rejected": -499.2417907714844, + "eval_loss": 0.6019599437713623, + "eval_rewards/accuracies": 0.6740000247955322, + "eval_rewards/chosen": -1.554559350013733, + "eval_rewards/margins": 0.42675837874412537, + "eval_rewards/rejected": -1.9813178777694702, + "eval_runtime": 197.2341, + "eval_samples_per_second": 10.14, + "eval_steps_per_second": 5.07, + "step": 5560 + }, + { + "epoch": 0.73, + "learning_rate": 1.0381246569307077e-06, + "logits/chosen": -2.548515796661377, + "logits/rejected": -2.5208840370178223, + "logps/chosen": -537.696044921875, + "logps/rejected": -528.3955688476562, + "loss": 0.6073, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.6977163553237915, + "rewards/margins": 0.3686388432979584, + "rewards/rejected": -2.0663552284240723, + "step": 5570 + }, + { + "epoch": 0.73, + "eval_logits/chosen": -2.273380756378174, + "eval_logits/rejected": -2.282517194747925, + "eval_logps/chosen": -486.97906494140625, + "eval_logps/rejected": -497.864013671875, + "eval_loss": 0.6022564768791199, + "eval_rewards/accuracies": 0.6710000038146973, + "eval_rewards/chosen": -1.5422965288162231, + "eval_rewards/margins": 0.4252430200576782, + "eval_rewards/rejected": -1.967539668083191, + "eval_runtime": 197.1563, + "eval_samples_per_second": 10.144, + "eval_steps_per_second": 5.072, + "step": 5570 + }, + { + "epoch": 0.73, + "learning_rate": 1.0288740144001722e-06, + "logits/chosen": -2.544621229171753, + "logits/rejected": -2.495824098587036, + "logps/chosen": -473.4520568847656, + "logps/rejected": -455.8770446777344, + "loss": 0.6376, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.4984050989151, + "rewards/margins": 0.3813208043575287, + "rewards/rejected": -1.8797260522842407, + "step": 5580 + }, + { + "epoch": 0.73, + "eval_logits/chosen": -2.277961492538452, + "eval_logits/rejected": -2.287504196166992, + "eval_logps/chosen": -484.2668151855469, + "eval_logps/rejected": -494.9880676269531, + "eval_loss": 0.602099597454071, + "eval_rewards/accuracies": 0.6725000143051147, + "eval_rewards/chosen": -1.515174150466919, + "eval_rewards/margins": 0.42360609769821167, + "eval_rewards/rejected": -1.9387801885604858, + "eval_runtime": 197.048, + "eval_samples_per_second": 10.15, + "eval_steps_per_second": 5.075, + "step": 5580 + }, + { + "epoch": 0.73, + "learning_rate": 1.0196540816790127e-06, + "logits/chosen": -2.4399209022521973, + "logits/rejected": -2.40258526802063, + "logps/chosen": -455.9623107910156, + "logps/rejected": -428.95220947265625, + "loss": 0.6265, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.552139163017273, + "rewards/margins": 0.3237138092517853, + "rewards/rejected": -1.8758528232574463, + "step": 5590 + }, + { + "epoch": 0.73, + "eval_logits/chosen": -2.283484697341919, + "eval_logits/rejected": -2.2933106422424316, + "eval_logps/chosen": -481.48687744140625, + "eval_logps/rejected": -491.9718017578125, + "eval_loss": 0.6015101075172424, + "eval_rewards/accuracies": 0.6744999885559082, + "eval_rewards/chosen": -1.4873746633529663, + "eval_rewards/margins": 0.421243280172348, + "eval_rewards/rejected": -1.9086179733276367, + "eval_runtime": 197.2622, + "eval_samples_per_second": 10.139, + "eval_steps_per_second": 5.069, + "step": 5590 + }, + { + "epoch": 0.73, + "learning_rate": 1.0104650512336679e-06, + "logits/chosen": -2.6136372089385986, + "logits/rejected": -2.5906195640563965, + "logps/chosen": -479.2579650878906, + "logps/rejected": -477.80609130859375, + "loss": 0.6676, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.4934594631195068, + "rewards/margins": 0.5157946944236755, + "rewards/rejected": -2.009254217147827, + "step": 5600 + }, + { + "epoch": 0.73, + "eval_logits/chosen": -2.2854361534118652, + "eval_logits/rejected": -2.2954437732696533, + "eval_logps/chosen": -479.8711242675781, + "eval_logps/rejected": -490.2027282714844, + "eval_loss": 0.6013363599777222, + "eval_rewards/accuracies": 0.6725000143051147, + "eval_rewards/chosen": -1.4712170362472534, + "eval_rewards/margins": 0.4197098910808563, + "eval_rewards/rejected": -1.8909268379211426, + "eval_runtime": 197.0989, + "eval_samples_per_second": 10.147, + "eval_steps_per_second": 5.074, + "step": 5600 + }, + { + "epoch": 0.73, + "learning_rate": 1.0013071148854861e-06, + "logits/chosen": -2.4359683990478516, + "logits/rejected": -2.471727132797241, + "logps/chosen": -435.45135498046875, + "logps/rejected": -510.74151611328125, + "loss": 0.4983, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -1.4019895792007446, + "rewards/margins": 0.7769641876220703, + "rewards/rejected": -2.1789536476135254, + "step": 5610 + }, + { + "epoch": 0.73, + "eval_logits/chosen": -2.2821550369262695, + "eval_logits/rejected": -2.2920167446136475, + "eval_logps/chosen": -481.1645202636719, + "eval_logps/rejected": -491.68902587890625, + "eval_loss": 0.6012270450592041, + "eval_rewards/accuracies": 0.6735000014305115, + "eval_rewards/chosen": -1.4841513633728027, + "eval_rewards/margins": 0.42163896560668945, + "eval_rewards/rejected": -1.9057903289794922, + "eval_runtime": 196.9735, + "eval_samples_per_second": 10.154, + "eval_steps_per_second": 5.077, + "step": 5610 + }, + { + "epoch": 0.74, + "learning_rate": 9.921804638067292e-07, + "logits/chosen": -2.549757719039917, + "logits/rejected": -2.479682445526123, + "logps/chosen": -484.84307861328125, + "logps/rejected": -484.265380859375, + "loss": 0.5565, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -1.5029096603393555, + "rewards/margins": 0.5209773778915405, + "rewards/rejected": -2.0238871574401855, + "step": 5620 + }, + { + "epoch": 0.74, + "eval_logits/chosen": -2.2793734073638916, + "eval_logits/rejected": -2.288806200027466, + "eval_logps/chosen": -483.9366760253906, + "eval_logps/rejected": -494.7959899902344, + "eval_loss": 0.6013678908348083, + "eval_rewards/accuracies": 0.6735000014305115, + "eval_rewards/chosen": -1.5118728876113892, + "eval_rewards/margins": 0.4249865412712097, + "eval_rewards/rejected": -1.936859369277954, + "eval_runtime": 196.9929, + "eval_samples_per_second": 10.153, + "eval_steps_per_second": 5.076, + "step": 5620 + }, + { + "epoch": 0.74, + "learning_rate": 9.830852885165749e-07, + "logits/chosen": -2.3858892917633057, + "logits/rejected": -2.5052125453948975, + "logps/chosen": -443.6277770996094, + "logps/rejected": -522.7201538085938, + "loss": 0.6331, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.6711645126342773, + "rewards/margins": 0.36261284351348877, + "rewards/rejected": -2.0337772369384766, + "step": 5630 + }, + { + "epoch": 0.74, + "eval_logits/chosen": -2.274428606033325, + "eval_logits/rejected": -2.283668041229248, + "eval_logps/chosen": -487.4775695800781, + "eval_logps/rejected": -498.7555847167969, + "eval_loss": 0.6015153527259827, + "eval_rewards/accuracies": 0.6735000014305115, + "eval_rewards/chosen": -1.5472811460494995, + "eval_rewards/margins": 0.4291747510433197, + "eval_rewards/rejected": -1.9764559268951416, + "eval_runtime": 196.9769, + "eval_samples_per_second": 10.153, + "eval_steps_per_second": 5.077, + "step": 5630 + }, + { + "epoch": 0.74, + "learning_rate": 9.740217788771453e-07, + "logits/chosen": -2.4526009559631348, + "logits/rejected": -2.5034918785095215, + "logps/chosen": -467.23858642578125, + "logps/rejected": -472.4024963378906, + "loss": 0.6273, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.4195793867111206, + "rewards/margins": 0.3300246298313141, + "rewards/rejected": -1.7496038675308228, + "step": 5640 + }, + { + "epoch": 0.74, + "eval_logits/chosen": -2.2742860317230225, + "eval_logits/rejected": -2.2834599018096924, + "eval_logps/chosen": -488.29083251953125, + "eval_logps/rejected": -499.7831726074219, + "eval_loss": 0.601729154586792, + "eval_rewards/accuracies": 0.6754999756813049, + "eval_rewards/chosen": -1.555414080619812, + "eval_rewards/margins": 0.4313174784183502, + "eval_rewards/rejected": -1.9867314100265503, + "eval_runtime": 196.9888, + "eval_samples_per_second": 10.153, + "eval_steps_per_second": 5.076, + "step": 5640 + }, + { + "epoch": 0.74, + "learning_rate": 9.649901240895374e-07, + "logits/chosen": -2.4312241077423096, + "logits/rejected": -2.428156852722168, + "logps/chosen": -451.2998962402344, + "logps/rejected": -492.22003173828125, + "loss": 0.5639, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -1.4851653575897217, + "rewards/margins": 0.5256800651550293, + "rewards/rejected": -2.010845184326172, + "step": 5650 + }, + { + "epoch": 0.74, + "eval_logits/chosen": -2.2733027935028076, + "eval_logits/rejected": -2.2820732593536377, + "eval_logps/chosen": -488.8881530761719, + "eval_logps/rejected": -500.4703063964844, + "eval_loss": 0.6017880439758301, + "eval_rewards/accuracies": 0.6740000247955322, + "eval_rewards/chosen": -1.561387538909912, + "eval_rewards/margins": 0.43221515417099, + "eval_rewards/rejected": -1.9936028718948364, + "eval_runtime": 197.1915, + "eval_samples_per_second": 10.142, + "eval_steps_per_second": 5.071, + "step": 5650 + }, + { + "epoch": 0.74, + "learning_rate": 9.559905126898803e-07, + "logits/chosen": -2.5057854652404785, + "logits/rejected": -2.45814847946167, + "logps/chosen": -486.2225646972656, + "logps/rejected": -483.7979431152344, + "loss": 0.5558, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.4760544300079346, + "rewards/margins": 0.47292762994766235, + "rewards/rejected": -1.9489818811416626, + "step": 5660 + }, + { + "epoch": 0.74, + "eval_logits/chosen": -2.2709062099456787, + "eval_logits/rejected": -2.2793853282928467, + "eval_logps/chosen": -489.8941345214844, + "eval_logps/rejected": -501.52117919921875, + "eval_loss": 0.6018960475921631, + "eval_rewards/accuracies": 0.6740000247955322, + "eval_rewards/chosen": -1.5714472532272339, + "eval_rewards/margins": 0.43266430497169495, + "eval_rewards/rejected": -2.0041117668151855, + "eval_runtime": 197.0072, + "eval_samples_per_second": 10.152, + "eval_steps_per_second": 5.076, + "step": 5660 + }, + { + "epoch": 0.74, + "learning_rate": 9.470231325453958e-07, + "logits/chosen": -2.486539125442505, + "logits/rejected": -2.4017176628112793, + "logps/chosen": -487.76812744140625, + "logps/rejected": -487.40020751953125, + "loss": 0.6319, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.659186601638794, + "rewards/margins": 0.414996862411499, + "rewards/rejected": -2.074183225631714, + "step": 5670 + }, + { + "epoch": 0.74, + "eval_logits/chosen": -2.2720530033111572, + "eval_logits/rejected": -2.280398368835449, + "eval_logps/chosen": -489.9786376953125, + "eval_logps/rejected": -501.5250549316406, + "eval_loss": 0.6018633246421814, + "eval_rewards/accuracies": 0.6735000014305115, + "eval_rewards/chosen": -1.5722917318344116, + "eval_rewards/margins": 0.4318588972091675, + "eval_rewards/rejected": -2.004150629043579, + "eval_runtime": 196.9852, + "eval_samples_per_second": 10.153, + "eval_steps_per_second": 5.077, + "step": 5670 + }, + { + "epoch": 0.74, + "learning_rate": 9.380881708504741e-07, + "logits/chosen": -2.430464029312134, + "logits/rejected": -2.3562910556793213, + "logps/chosen": -424.9378356933594, + "logps/rejected": -422.5223083496094, + "loss": 0.6027, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.4295918941497803, + "rewards/margins": 0.45209747552871704, + "rewards/rejected": -1.881689429283142, + "step": 5680 + }, + { + "epoch": 0.74, + "eval_logits/chosen": -2.2714383602142334, + "eval_logits/rejected": -2.279670000076294, + "eval_logps/chosen": -489.5482482910156, + "eval_logps/rejected": -501.127685546875, + "eval_loss": 0.6015214323997498, + "eval_rewards/accuracies": 0.671999990940094, + "eval_rewards/chosen": -1.5679885149002075, + "eval_rewards/margins": 0.4321881830692291, + "eval_rewards/rejected": -2.0001769065856934, + "eval_runtime": 197.0412, + "eval_samples_per_second": 10.15, + "eval_steps_per_second": 5.075, + "step": 5680 + }, + { + "epoch": 0.74, + "learning_rate": 9.291858141227733e-07, + "logits/chosen": -2.5464229583740234, + "logits/rejected": -2.510371208190918, + "logps/chosen": -473.4132385253906, + "logps/rejected": -516.4024658203125, + "loss": 0.6094, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -1.5813710689544678, + "rewards/margins": 0.36329102516174316, + "rewards/rejected": -1.94466233253479, + "step": 5690 + }, + { + "epoch": 0.74, + "eval_logits/chosen": -2.272278308868408, + "eval_logits/rejected": -2.2808241844177246, + "eval_logps/chosen": -486.8964538574219, + "eval_logps/rejected": -498.2379455566406, + "eval_loss": 0.6014631390571594, + "eval_rewards/accuracies": 0.675000011920929, + "eval_rewards/chosen": -1.5414706468582153, + "eval_rewards/margins": 0.42980849742889404, + "eval_rewards/rejected": -1.9712789058685303, + "eval_runtime": 196.9031, + "eval_samples_per_second": 10.157, + "eval_steps_per_second": 5.079, + "step": 5690 + }, + { + "epoch": 0.75, + "learning_rate": 9.203162481993175e-07, + "logits/chosen": -2.574666976928711, + "logits/rejected": -2.5605838298797607, + "logps/chosen": -517.884521484375, + "logps/rejected": -548.8802490234375, + "loss": 0.5418, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -1.382240891456604, + "rewards/margins": 0.5335317254066467, + "rewards/rejected": -1.9157726764678955, + "step": 5700 + }, + { + "epoch": 0.75, + "eval_logits/chosen": -2.270965337753296, + "eval_logits/rejected": -2.279421091079712, + "eval_logps/chosen": -486.2874450683594, + "eval_logps/rejected": -497.8123474121094, + "eval_loss": 0.6014032959938049, + "eval_rewards/accuracies": 0.675000011920929, + "eval_rewards/chosen": -1.5353801250457764, + "eval_rewards/margins": 0.43164312839508057, + "eval_rewards/rejected": -1.9670231342315674, + "eval_runtime": 196.9824, + "eval_samples_per_second": 10.153, + "eval_steps_per_second": 5.077, + "step": 5700 + }, + { + "epoch": 0.75, + "learning_rate": 9.114796582326255e-07, + "logits/chosen": -2.587486505508423, + "logits/rejected": -2.52176570892334, + "logps/chosen": -476.721923828125, + "logps/rejected": -478.04241943359375, + "loss": 0.5983, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.6361109018325806, + "rewards/margins": 0.37555187940597534, + "rewards/rejected": -2.0116629600524902, + "step": 5710 + }, + { + "epoch": 0.75, + "eval_logits/chosen": -2.265850067138672, + "eval_logits/rejected": -2.2744035720825195, + "eval_logps/chosen": -486.9491271972656, + "eval_logps/rejected": -498.6160888671875, + "eval_loss": 0.601709246635437, + "eval_rewards/accuracies": 0.6735000014305115, + "eval_rewards/chosen": -1.5419965982437134, + "eval_rewards/margins": 0.4330638349056244, + "eval_rewards/rejected": -1.9750605821609497, + "eval_runtime": 196.9651, + "eval_samples_per_second": 10.154, + "eval_steps_per_second": 5.077, + "step": 5710 + }, + { + "epoch": 0.75, + "learning_rate": 9.026762286868373e-07, + "logits/chosen": -2.5438895225524902, + "logits/rejected": -2.5906364917755127, + "logps/chosen": -475.0172424316406, + "logps/rejected": -548.627685546875, + "loss": 0.514, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -1.4461355209350586, + "rewards/margins": 0.6971672773361206, + "rewards/rejected": -2.1433026790618896, + "step": 5720 + }, + { + "epoch": 0.75, + "eval_logits/chosen": -2.2665517330169678, + "eval_logits/rejected": -2.27467942237854, + "eval_logps/chosen": -486.794677734375, + "eval_logps/rejected": -498.497802734375, + "eval_loss": 0.6015446782112122, + "eval_rewards/accuracies": 0.6725000143051147, + "eval_rewards/chosen": -1.5404525995254517, + "eval_rewards/margins": 0.43342551589012146, + "eval_rewards/rejected": -1.973878026008606, + "eval_runtime": 197.0977, + "eval_samples_per_second": 10.147, + "eval_steps_per_second": 5.074, + "step": 5720 + }, + { + "epoch": 0.75, + "learning_rate": 8.939061433338722e-07, + "logits/chosen": -2.5130527019500732, + "logits/rejected": -2.499204635620117, + "logps/chosen": -486.7627868652344, + "logps/rejected": -509.2347106933594, + "loss": 0.619, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.4873241186141968, + "rewards/margins": 0.36170998215675354, + "rewards/rejected": -1.849034070968628, + "step": 5730 + }, + { + "epoch": 0.75, + "eval_logits/chosen": -2.2673990726470947, + "eval_logits/rejected": -2.2757279872894287, + "eval_logps/chosen": -486.59014892578125, + "eval_logps/rejected": -498.31207275390625, + "eval_loss": 0.601487934589386, + "eval_rewards/accuracies": 0.6729999780654907, + "eval_rewards/chosen": -1.5384074449539185, + "eval_rewards/margins": 0.4336126148700714, + "eval_rewards/rejected": -1.9720200300216675, + "eval_runtime": 197.3813, + "eval_samples_per_second": 10.133, + "eval_steps_per_second": 5.066, + "step": 5730 + }, + { + "epoch": 0.75, + "learning_rate": 8.851695852495867e-07, + "logits/chosen": -2.487215280532837, + "logits/rejected": -2.556673526763916, + "logps/chosen": -415.80621337890625, + "logps/rejected": -482.0787048339844, + "loss": 0.5531, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.3482439517974854, + "rewards/margins": 0.6087583899497986, + "rewards/rejected": -1.9570024013519287, + "step": 5740 + }, + { + "epoch": 0.75, + "eval_logits/chosen": -2.2642552852630615, + "eval_logits/rejected": -2.2726001739501953, + "eval_logps/chosen": -488.0776672363281, + "eval_logps/rejected": -500.07562255859375, + "eval_loss": 0.6019229292869568, + "eval_rewards/accuracies": 0.6725000143051147, + "eval_rewards/chosen": -1.5532824993133545, + "eval_rewards/margins": 0.43637382984161377, + "eval_rewards/rejected": -1.9896563291549683, + "eval_runtime": 197.2187, + "eval_samples_per_second": 10.141, + "eval_steps_per_second": 5.071, + "step": 5740 + }, + { + "epoch": 0.75, + "learning_rate": 8.764667368099525e-07, + "logits/chosen": -2.383542060852051, + "logits/rejected": -2.3600852489471436, + "logps/chosen": -447.48663330078125, + "logps/rejected": -461.00311279296875, + "loss": 0.5954, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -1.495228886604309, + "rewards/margins": 0.45612436532974243, + "rewards/rejected": -1.9513533115386963, + "step": 5750 + }, + { + "epoch": 0.75, + "eval_logits/chosen": -2.2611377239227295, + "eval_logits/rejected": -2.2693042755126953, + "eval_logps/chosen": -491.0955810546875, + "eval_logps/rejected": -503.47662353515625, + "eval_loss": 0.6021108627319336, + "eval_rewards/accuracies": 0.6744999885559082, + "eval_rewards/chosen": -1.5834616422653198, + "eval_rewards/margins": 0.44020453095436096, + "eval_rewards/rejected": -2.0236663818359375, + "eval_runtime": 197.0868, + "eval_samples_per_second": 10.148, + "eval_steps_per_second": 5.074, + "step": 5750 + }, + { + "epoch": 0.75, + "learning_rate": 8.677977796872541e-07, + "logits/chosen": -2.4153354167938232, + "logits/rejected": -2.3980398178100586, + "logps/chosen": -519.9898071289062, + "logps/rejected": -478.7305603027344, + "loss": 0.5817, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -1.646702527999878, + "rewards/margins": 0.4848001003265381, + "rewards/rejected": -2.131502628326416, + "step": 5760 + }, + { + "epoch": 0.75, + "eval_logits/chosen": -2.2558789253234863, + "eval_logits/rejected": -2.263498544692993, + "eval_logps/chosen": -495.2908020019531, + "eval_logps/rejected": -507.93896484375, + "eval_loss": 0.6024330854415894, + "eval_rewards/accuracies": 0.675000011920929, + "eval_rewards/chosen": -1.6254137754440308, + "eval_rewards/margins": 0.4428756833076477, + "eval_rewards/rejected": -2.068289279937744, + "eval_runtime": 197.0828, + "eval_samples_per_second": 10.148, + "eval_steps_per_second": 5.074, + "step": 5760 + }, + { + "epoch": 0.76, + "learning_rate": 8.591628948462913e-07, + "logits/chosen": -2.3832852840423584, + "logits/rejected": -2.3352439403533936, + "logps/chosen": -496.6756896972656, + "logps/rejected": -539.018798828125, + "loss": 0.5812, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.5903397798538208, + "rewards/margins": 0.47482776641845703, + "rewards/rejected": -2.0651674270629883, + "step": 5770 + }, + { + "epoch": 0.76, + "eval_logits/chosen": -2.252265691757202, + "eval_logits/rejected": -2.259838104248047, + "eval_logps/chosen": -496.9005432128906, + "eval_logps/rejected": -509.745361328125, + "eval_loss": 0.6024233102798462, + "eval_rewards/accuracies": 0.6765000224113464, + "eval_rewards/chosen": -1.6415109634399414, + "eval_rewards/margins": 0.4448423981666565, + "eval_rewards/rejected": -2.086353302001953, + "eval_runtime": 197.1673, + "eval_samples_per_second": 10.144, + "eval_steps_per_second": 5.072, + "step": 5770 + }, + { + "epoch": 0.76, + "learning_rate": 8.505622625406054e-07, + "logits/chosen": -2.427070140838623, + "logits/rejected": -2.4216442108154297, + "logps/chosen": -469.9287109375, + "logps/rejected": -521.0614013671875, + "loss": 0.5655, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -1.5248607397079468, + "rewards/margins": 0.5117624402046204, + "rewards/rejected": -2.036623239517212, + "step": 5780 + }, + { + "epoch": 0.76, + "eval_logits/chosen": -2.2464487552642822, + "eval_logits/rejected": -2.253951072692871, + "eval_logps/chosen": -497.7292785644531, + "eval_logps/rejected": -510.7714538574219, + "eval_loss": 0.6029162406921387, + "eval_rewards/accuracies": 0.6765000224113464, + "eval_rewards/chosen": -1.64979887008667, + "eval_rewards/margins": 0.4468156099319458, + "eval_rewards/rejected": -2.0966145992279053, + "eval_runtime": 197.0263, + "eval_samples_per_second": 10.151, + "eval_steps_per_second": 5.075, + "step": 5780 + }, + { + "epoch": 0.76, + "learning_rate": 8.419960623087129e-07, + "logits/chosen": -2.320359468460083, + "logits/rejected": -2.3157875537872314, + "logps/chosen": -408.5916442871094, + "logps/rejected": -476.99945068359375, + "loss": 0.6, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -1.4397971630096436, + "rewards/margins": 0.4516604542732239, + "rewards/rejected": -1.8914577960968018, + "step": 5790 + }, + { + "epoch": 0.76, + "eval_logits/chosen": -2.2434499263763428, + "eval_logits/rejected": -2.251415252685547, + "eval_logps/chosen": -495.6883850097656, + "eval_logps/rejected": -508.6549377441406, + "eval_loss": 0.6027740240097046, + "eval_rewards/accuracies": 0.671500027179718, + "eval_rewards/chosen": -1.6293898820877075, + "eval_rewards/margins": 0.44605928659439087, + "eval_rewards/rejected": -2.075449228286743, + "eval_runtime": 197.5339, + "eval_samples_per_second": 10.125, + "eval_steps_per_second": 5.062, + "step": 5790 + }, + { + "epoch": 0.76, + "learning_rate": 8.334644729703617e-07, + "logits/chosen": -2.474212408065796, + "logits/rejected": -2.4731945991516113, + "logps/chosen": -463.1309509277344, + "logps/rejected": -494.95343017578125, + "loss": 0.6685, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.7183113098144531, + "rewards/margins": 0.34951871633529663, + "rewards/rejected": -2.0678298473358154, + "step": 5800 + }, + { + "epoch": 0.76, + "eval_logits/chosen": -2.242737054824829, + "eval_logits/rejected": -2.251211404800415, + "eval_logps/chosen": -494.2292785644531, + "eval_logps/rejected": -507.03466796875, + "eval_loss": 0.602836549282074, + "eval_rewards/accuracies": 0.6725000143051147, + "eval_rewards/chosen": -1.6147984266281128, + "eval_rewards/margins": 0.44444799423217773, + "eval_rewards/rejected": -2.05924654006958, + "eval_runtime": 196.8054, + "eval_samples_per_second": 10.162, + "eval_steps_per_second": 5.081, + "step": 5800 + }, + { + "epoch": 0.76, + "learning_rate": 8.249676726227931e-07, + "logits/chosen": -2.3594369888305664, + "logits/rejected": -2.4024455547332764, + "logps/chosen": -534.9568481445312, + "logps/rejected": -516.611328125, + "loss": 0.6623, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.6957733631134033, + "rewards/margins": 0.27873340249061584, + "rewards/rejected": -1.9745069742202759, + "step": 5810 + }, + { + "epoch": 0.76, + "eval_logits/chosen": -2.245490550994873, + "eval_logits/rejected": -2.2543113231658936, + "eval_logps/chosen": -490.97747802734375, + "eval_logps/rejected": -503.5045166015625, + "eval_loss": 0.6023638844490051, + "eval_rewards/accuracies": 0.671500027179718, + "eval_rewards/chosen": -1.5822806358337402, + "eval_rewards/margins": 0.44166430830955505, + "eval_rewards/rejected": -2.023944854736328, + "eval_runtime": 197.2133, + "eval_samples_per_second": 10.141, + "eval_steps_per_second": 5.071, + "step": 5810 + }, + { + "epoch": 0.76, + "learning_rate": 8.165058386370314e-07, + "logits/chosen": -2.4096219539642334, + "logits/rejected": -2.40710711479187, + "logps/chosen": -485.88201904296875, + "logps/rejected": -538.20654296875, + "loss": 0.6156, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.5708699226379395, + "rewards/margins": 0.40414518117904663, + "rewards/rejected": -1.9750150442123413, + "step": 5820 + }, + { + "epoch": 0.76, + "eval_logits/chosen": -2.2488386631011963, + "eval_logits/rejected": -2.2581334114074707, + "eval_logps/chosen": -488.6615295410156, + "eval_logps/rejected": -500.8721618652344, + "eval_loss": 0.6023542881011963, + "eval_rewards/accuracies": 0.671500027179718, + "eval_rewards/chosen": -1.5591212511062622, + "eval_rewards/margins": 0.4384998679161072, + "eval_rewards/rejected": -1.997620940208435, + "eval_runtime": 197.0619, + "eval_samples_per_second": 10.149, + "eval_steps_per_second": 5.075, + "step": 5820 + }, + { + "epoch": 0.76, + "learning_rate": 8.080791476541721e-07, + "logits/chosen": -2.366792678833008, + "logits/rejected": -2.3858425617218018, + "logps/chosen": -435.0596618652344, + "logps/rejected": -487.25054931640625, + "loss": 0.5679, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.5387595891952515, + "rewards/margins": 0.6316569447517395, + "rewards/rejected": -2.1704165935516357, + "step": 5830 + }, + { + "epoch": 0.76, + "eval_logits/chosen": -2.2466695308685303, + "eval_logits/rejected": -2.2560107707977295, + "eval_logps/chosen": -489.2959289550781, + "eval_logps/rejected": -501.5491943359375, + "eval_loss": 0.6022310256958008, + "eval_rewards/accuracies": 0.6704999804496765, + "eval_rewards/chosen": -1.5654653310775757, + "eval_rewards/margins": 0.4389267563819885, + "eval_rewards/rejected": -2.00439190864563, + "eval_runtime": 197.0548, + "eval_samples_per_second": 10.149, + "eval_steps_per_second": 5.075, + "step": 5830 + }, + { + "epoch": 0.76, + "learning_rate": 7.996877755817026e-07, + "logits/chosen": -2.478151321411133, + "logits/rejected": -2.4247565269470215, + "logps/chosen": -471.8946228027344, + "logps/rejected": -460.6763610839844, + "loss": 0.6497, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.5455646514892578, + "rewards/margins": 0.3121718466281891, + "rewards/rejected": -1.857736587524414, + "step": 5840 + }, + { + "epoch": 0.76, + "eval_logits/chosen": -2.2453322410583496, + "eval_logits/rejected": -2.2547030448913574, + "eval_logps/chosen": -488.5802307128906, + "eval_logps/rejected": -500.7490539550781, + "eval_loss": 0.6024636030197144, + "eval_rewards/accuracies": 0.6735000014305115, + "eval_rewards/chosen": -1.5583082437515259, + "eval_rewards/margins": 0.4380822479724884, + "eval_rewards/rejected": -1.9963903427124023, + "eval_runtime": 197.1694, + "eval_samples_per_second": 10.144, + "eval_steps_per_second": 5.072, + "step": 5840 + }, + { + "epoch": 0.77, + "learning_rate": 7.913318975898238e-07, + "logits/chosen": -2.5146617889404297, + "logits/rejected": -2.4494576454162598, + "logps/chosen": -574.7240600585938, + "logps/rejected": -543.0043334960938, + "loss": 0.6452, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.6685956716537476, + "rewards/margins": 0.39929109811782837, + "rewards/rejected": -2.0678868293762207, + "step": 5850 + }, + { + "epoch": 0.77, + "eval_logits/chosen": -2.2524771690368652, + "eval_logits/rejected": -2.2622721195220947, + "eval_logps/chosen": -485.30487060546875, + "eval_logps/rejected": -497.1414489746094, + "eval_loss": 0.6020148992538452, + "eval_rewards/accuracies": 0.6704999804496765, + "eval_rewards/chosen": -1.525554895401001, + "eval_rewards/margins": 0.43475958704948425, + "eval_rewards/rejected": -1.960314154624939, + "eval_runtime": 197.0139, + "eval_samples_per_second": 10.152, + "eval_steps_per_second": 5.076, + "step": 5850 + }, + { + "epoch": 0.77, + "learning_rate": 7.830116881077992e-07, + "logits/chosen": -2.4145424365997314, + "logits/rejected": -2.4374794960021973, + "logps/chosen": -492.340576171875, + "logps/rejected": -513.1353759765625, + "loss": 0.546, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.4143502712249756, + "rewards/margins": 0.5963946580886841, + "rewards/rejected": -2.010745048522949, + "step": 5860 + }, + { + "epoch": 0.77, + "eval_logits/chosen": -2.2555458545684814, + "eval_logits/rejected": -2.265408754348755, + "eval_logps/chosen": -485.0486145019531, + "eval_logps/rejected": -496.7984313964844, + "eval_loss": 0.601836085319519, + "eval_rewards/accuracies": 0.6700000166893005, + "eval_rewards/chosen": -1.5229917764663696, + "eval_rewards/margins": 0.43389254808425903, + "eval_rewards/rejected": -1.9568843841552734, + "eval_runtime": 197.3008, + "eval_samples_per_second": 10.137, + "eval_steps_per_second": 5.068, + "step": 5860 + }, + { + "epoch": 0.77, + "learning_rate": 7.747273208203096e-07, + "logits/chosen": -2.4561104774475098, + "logits/rejected": -2.4396491050720215, + "logps/chosen": -484.59979248046875, + "logps/rejected": -533.8568115234375, + "loss": 0.6034, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.5615015029907227, + "rewards/margins": 0.4400373101234436, + "rewards/rejected": -2.0015387535095215, + "step": 5870 + }, + { + "epoch": 0.77, + "eval_logits/chosen": -2.259807586669922, + "eval_logits/rejected": -2.2697737216949463, + "eval_logps/chosen": -483.45758056640625, + "eval_logps/rejected": -494.9549560546875, + "eval_loss": 0.601536750793457, + "eval_rewards/accuracies": 0.6735000014305115, + "eval_rewards/chosen": -1.5070816278457642, + "eval_rewards/margins": 0.4313679337501526, + "eval_rewards/rejected": -1.938449501991272, + "eval_runtime": 196.9847, + "eval_samples_per_second": 10.153, + "eval_steps_per_second": 5.077, + "step": 5870 + }, + { + "epoch": 0.77, + "learning_rate": 7.664789686638272e-07, + "logits/chosen": -2.4302382469177246, + "logits/rejected": -2.3414528369903564, + "logps/chosen": -445.996337890625, + "logps/rejected": -505.2832946777344, + "loss": 0.5887, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -1.4469554424285889, + "rewards/margins": 0.5224305987358093, + "rewards/rejected": -1.969386339187622, + "step": 5880 + }, + { + "epoch": 0.77, + "eval_logits/chosen": -2.263823986053467, + "eval_logits/rejected": -2.273604154586792, + "eval_logps/chosen": -482.2774963378906, + "eval_logps/rejected": -493.6579284667969, + "eval_loss": 0.6012995839118958, + "eval_rewards/accuracies": 0.6740000247955322, + "eval_rewards/chosen": -1.4952807426452637, + "eval_rewards/margins": 0.4301982820034027, + "eval_rewards/rejected": -1.9254790544509888, + "eval_runtime": 196.9121, + "eval_samples_per_second": 10.157, + "eval_steps_per_second": 5.078, + "step": 5880 + }, + { + "epoch": 0.77, + "learning_rate": 7.582668038230089e-07, + "logits/chosen": -2.566232204437256, + "logits/rejected": -2.5483384132385254, + "logps/chosen": -482.5179138183594, + "logps/rejected": -509.20068359375, + "loss": 0.5731, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -1.3963481187820435, + "rewards/margins": 0.5344547033309937, + "rewards/rejected": -1.9308027029037476, + "step": 5890 + }, + { + "epoch": 0.77, + "eval_logits/chosen": -2.2662353515625, + "eval_logits/rejected": -2.2763512134552, + "eval_logps/chosen": -481.1260681152344, + "eval_logps/rejected": -492.50885009765625, + "eval_loss": 0.6008906364440918, + "eval_rewards/accuracies": 0.6740000247955322, + "eval_rewards/chosen": -1.4837665557861328, + "eval_rewards/margins": 0.4302213191986084, + "eval_rewards/rejected": -1.9139878749847412, + "eval_runtime": 196.9832, + "eval_samples_per_second": 10.153, + "eval_steps_per_second": 5.077, + "step": 5890 + }, + { + "epoch": 0.77, + "learning_rate": 7.500909977271007e-07, + "logits/chosen": -2.534989356994629, + "logits/rejected": -2.5355916023254395, + "logps/chosen": -502.8077087402344, + "logps/rejected": -513.6491088867188, + "loss": 0.6046, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.5153987407684326, + "rewards/margins": 0.42296546697616577, + "rewards/rejected": -1.938364028930664, + "step": 5900 + }, + { + "epoch": 0.77, + "eval_logits/chosen": -2.2680561542510986, + "eval_logits/rejected": -2.2782294750213623, + "eval_logps/chosen": -479.7952880859375, + "eval_logps/rejected": -491.11492919921875, + "eval_loss": 0.6009992957115173, + "eval_rewards/accuracies": 0.675000011920929, + "eval_rewards/chosen": -1.470458745956421, + "eval_rewards/margins": 0.4295899569988251, + "eval_rewards/rejected": -1.900048851966858, + "eval_runtime": 197.14, + "eval_samples_per_second": 10.145, + "eval_steps_per_second": 5.073, + "step": 5900 + }, + { + "epoch": 0.77, + "learning_rate": 7.41951721046357e-07, + "logits/chosen": -2.4341280460357666, + "logits/rejected": -2.367621421813965, + "logps/chosen": -463.6956481933594, + "logps/rejected": -497.50518798828125, + "loss": 0.5727, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.3780359029769897, + "rewards/margins": 0.5068241357803345, + "rewards/rejected": -1.8848600387573242, + "step": 5910 + }, + { + "epoch": 0.77, + "eval_logits/chosen": -2.267744541168213, + "eval_logits/rejected": -2.2779266834259033, + "eval_logps/chosen": -478.6908264160156, + "eval_logps/rejected": -489.8962097167969, + "eval_loss": 0.6008686423301697, + "eval_rewards/accuracies": 0.6735000014305115, + "eval_rewards/chosen": -1.4594143629074097, + "eval_rewards/margins": 0.4284478425979614, + "eval_rewards/rejected": -1.887862205505371, + "eval_runtime": 196.874, + "eval_samples_per_second": 10.159, + "eval_steps_per_second": 5.079, + "step": 5910 + }, + { + "epoch": 0.77, + "learning_rate": 7.338491436884787e-07, + "logits/chosen": -2.3899145126342773, + "logits/rejected": -2.415982723236084, + "logps/chosen": -430.9964904785156, + "logps/rejected": -475.78314208984375, + "loss": 0.5793, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.4056332111358643, + "rewards/margins": 0.44162511825561523, + "rewards/rejected": -1.8472583293914795, + "step": 5920 + }, + { + "epoch": 0.77, + "eval_logits/chosen": -2.2659740447998047, + "eval_logits/rejected": -2.2761447429656982, + "eval_logps/chosen": -477.880615234375, + "eval_logps/rejected": -489.0225524902344, + "eval_loss": 0.6009781360626221, + "eval_rewards/accuracies": 0.6759999990463257, + "eval_rewards/chosen": -1.4513121843338013, + "eval_rewards/margins": 0.4278135299682617, + "eval_rewards/rejected": -1.879125714302063, + "eval_runtime": 196.8377, + "eval_samples_per_second": 10.161, + "eval_steps_per_second": 5.08, + "step": 5920 + }, + { + "epoch": 0.78, + "learning_rate": 7.257834347950693e-07, + "logits/chosen": -2.445920467376709, + "logits/rejected": -2.4083142280578613, + "logps/chosen": -465.4082946777344, + "logps/rejected": -448.84210205078125, + "loss": 0.6688, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -1.5156171321868896, + "rewards/margins": 0.25803884863853455, + "rewards/rejected": -1.7736561298370361, + "step": 5930 + }, + { + "epoch": 0.78, + "eval_logits/chosen": -2.2672274112701416, + "eval_logits/rejected": -2.2775511741638184, + "eval_logps/chosen": -476.42205810546875, + "eval_logps/rejected": -487.39031982421875, + "eval_loss": 0.6011342406272888, + "eval_rewards/accuracies": 0.6759999990463257, + "eval_rewards/chosen": -1.4367263317108154, + "eval_rewards/margins": 0.4260764718055725, + "eval_rewards/rejected": -1.862802505493164, + "eval_runtime": 197.2252, + "eval_samples_per_second": 10.141, + "eval_steps_per_second": 5.07, + "step": 5930 + }, + { + "epoch": 0.78, + "learning_rate": 7.177547627380987e-07, + "logits/chosen": -2.4808781147003174, + "logits/rejected": -2.4829397201538086, + "logps/chosen": -504.11962890625, + "logps/rejected": -516.2730712890625, + "loss": 0.5613, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -1.3955371379852295, + "rewards/margins": 0.46548405289649963, + "rewards/rejected": -1.8610212802886963, + "step": 5940 + }, + { + "epoch": 0.78, + "eval_logits/chosen": -2.265183687210083, + "eval_logits/rejected": -2.2755210399627686, + "eval_logps/chosen": -476.0000305175781, + "eval_logps/rejected": -486.95806884765625, + "eval_loss": 0.6013757586479187, + "eval_rewards/accuracies": 0.6759999990463257, + "eval_rewards/chosen": -1.4325059652328491, + "eval_rewards/margins": 0.42597436904907227, + "eval_rewards/rejected": -1.858480453491211, + "eval_runtime": 197.0249, + "eval_samples_per_second": 10.151, + "eval_steps_per_second": 5.076, + "step": 5940 + }, + { + "epoch": 0.78, + "learning_rate": 7.097632951163949e-07, + "logits/chosen": -2.447105884552002, + "logits/rejected": -2.4564273357391357, + "logps/chosen": -489.5555114746094, + "logps/rejected": -485.93609619140625, + "loss": 0.6437, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -1.3995712995529175, + "rewards/margins": 0.33234044909477234, + "rewards/rejected": -1.7319118976593018, + "step": 5950 + }, + { + "epoch": 0.78, + "eval_logits/chosen": -2.2648227214813232, + "eval_logits/rejected": -2.2753043174743652, + "eval_logps/chosen": -475.759033203125, + "eval_logps/rejected": -486.78265380859375, + "eval_loss": 0.601370632648468, + "eval_rewards/accuracies": 0.6725000143051147, + "eval_rewards/chosen": -1.4300963878631592, + "eval_rewards/margins": 0.4266298711299896, + "eval_rewards/rejected": -1.8567264080047607, + "eval_runtime": 197.0184, + "eval_samples_per_second": 10.151, + "eval_steps_per_second": 5.076, + "step": 5950 + }, + { + "epoch": 0.78, + "learning_rate": 7.018091987521386e-07, + "logits/chosen": -2.5762312412261963, + "logits/rejected": -2.48101806640625, + "logps/chosen": -496.75152587890625, + "logps/rejected": -503.8118591308594, + "loss": 0.6239, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -1.5386106967926025, + "rewards/margins": 0.41985002160072327, + "rewards/rejected": -1.9584605693817139, + "step": 5960 + }, + { + "epoch": 0.78, + "eval_logits/chosen": -2.265352249145508, + "eval_logits/rejected": -2.275949239730835, + "eval_logps/chosen": -475.8328857421875, + "eval_logps/rejected": -486.9198303222656, + "eval_loss": 0.6012548804283142, + "eval_rewards/accuracies": 0.6725000143051147, + "eval_rewards/chosen": -1.4308347702026367, + "eval_rewards/margins": 0.4272632896900177, + "eval_rewards/rejected": -1.8580981492996216, + "eval_runtime": 197.3352, + "eval_samples_per_second": 10.135, + "eval_steps_per_second": 5.068, + "step": 5960 + }, + { + "epoch": 0.78, + "learning_rate": 6.93892639687386e-07, + "logits/chosen": -2.5643134117126465, + "logits/rejected": -2.5059189796447754, + "logps/chosen": -499.9007263183594, + "logps/rejected": -481.8160095214844, + "loss": 0.5621, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -1.3037292957305908, + "rewards/margins": 0.500011146068573, + "rewards/rejected": -1.8037407398223877, + "step": 5970 + }, + { + "epoch": 0.78, + "eval_logits/chosen": -2.2671244144439697, + "eval_logits/rejected": -2.277761697769165, + "eval_logps/chosen": -475.1019287109375, + "eval_logps/rejected": -486.0708312988281, + "eval_loss": 0.6014404892921448, + "eval_rewards/accuracies": 0.6759999990463257, + "eval_rewards/chosen": -1.4235249757766724, + "eval_rewards/margins": 0.4260830581188202, + "eval_rewards/rejected": -1.8496081829071045, + "eval_runtime": 196.9419, + "eval_samples_per_second": 10.155, + "eval_steps_per_second": 5.078, + "step": 5970 + }, + { + "epoch": 0.78, + "learning_rate": 6.860137831806018e-07, + "logits/chosen": -2.452705144882202, + "logits/rejected": -2.4688546657562256, + "logps/chosen": -502.90594482421875, + "logps/rejected": -490.27099609375, + "loss": 0.6296, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.4148881435394287, + "rewards/margins": 0.36012446880340576, + "rewards/rejected": -1.7750126123428345, + "step": 5980 + }, + { + "epoch": 0.78, + "eval_logits/chosen": -2.265007734298706, + "eval_logits/rejected": -2.275844097137451, + "eval_logps/chosen": -475.1183776855469, + "eval_logps/rejected": -486.05059814453125, + "eval_loss": 0.6015436053276062, + "eval_rewards/accuracies": 0.6765000224113464, + "eval_rewards/chosen": -1.4236900806427002, + "eval_rewards/margins": 0.4257160723209381, + "eval_rewards/rejected": -1.849406123161316, + "eval_runtime": 196.9075, + "eval_samples_per_second": 10.157, + "eval_steps_per_second": 5.079, + "step": 5980 + }, + { + "epoch": 0.78, + "learning_rate": 6.781727937032054e-07, + "logits/chosen": -2.403275966644287, + "logits/rejected": -2.36027193069458, + "logps/chosen": -439.105712890625, + "logps/rejected": -502.03125, + "loss": 0.4725, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -1.2160950899124146, + "rewards/margins": 0.7384004592895508, + "rewards/rejected": -1.9544956684112549, + "step": 5990 + }, + { + "epoch": 0.78, + "eval_logits/chosen": -2.2625324726104736, + "eval_logits/rejected": -2.27329158782959, + "eval_logps/chosen": -476.120361328125, + "eval_logps/rejected": -487.20330810546875, + "eval_loss": 0.6016895174980164, + "eval_rewards/accuracies": 0.6744999885559082, + "eval_rewards/chosen": -1.433709979057312, + "eval_rewards/margins": 0.4272230565547943, + "eval_rewards/rejected": -1.8609328269958496, + "eval_runtime": 197.0728, + "eval_samples_per_second": 10.149, + "eval_steps_per_second": 5.074, + "step": 5990 + }, + { + "epoch": 0.79, + "learning_rate": 6.703698349361437e-07, + "logits/chosen": -2.4751968383789062, + "logits/rejected": -2.4393486976623535, + "logps/chosen": -460.97589111328125, + "logps/rejected": -453.7529296875, + "loss": 0.6035, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -1.4815524816513062, + "rewards/margins": 0.4474663734436035, + "rewards/rejected": -1.9290189743041992, + "step": 6000 + }, + { + "epoch": 0.79, + "eval_logits/chosen": -2.2604939937591553, + "eval_logits/rejected": -2.2711093425750732, + "eval_logps/chosen": -476.5792236328125, + "eval_logps/rejected": -487.7991943359375, + "eval_loss": 0.6018210649490356, + "eval_rewards/accuracies": 0.6744999885559082, + "eval_rewards/chosen": -1.4382983446121216, + "eval_rewards/margins": 0.4285930097103119, + "eval_rewards/rejected": -1.8668912649154663, + "eval_runtime": 197.257, + "eval_samples_per_second": 10.139, + "eval_steps_per_second": 5.07, + "step": 6000 + }, + { + "epoch": 0.79, + "learning_rate": 6.626050697664682e-07, + "logits/chosen": -2.4417779445648193, + "logits/rejected": -2.4099671840667725, + "logps/chosen": -476.68585205078125, + "logps/rejected": -481.16741943359375, + "loss": 0.5114, + "rewards/accuracies": 0.8125, + "rewards/chosen": -1.3816766738891602, + "rewards/margins": 0.5975061058998108, + "rewards/rejected": -1.9791828393936157, + "step": 6010 + }, + { + "epoch": 0.79, + "eval_logits/chosen": -2.2580788135528564, + "eval_logits/rejected": -2.2684972286224365, + "eval_logps/chosen": -476.69720458984375, + "eval_logps/rejected": -487.9945983886719, + "eval_loss": 0.6021937727928162, + "eval_rewards/accuracies": 0.6740000247955322, + "eval_rewards/chosen": -1.4394779205322266, + "eval_rewards/margins": 0.42936745285987854, + "eval_rewards/rejected": -1.8688453435897827, + "eval_runtime": 197.1161, + "eval_samples_per_second": 10.146, + "eval_steps_per_second": 5.073, + "step": 6010 + }, + { + "epoch": 0.79, + "learning_rate": 6.548786602839404e-07, + "logits/chosen": -2.4599475860595703, + "logits/rejected": -2.4786622524261475, + "logps/chosen": -427.3006896972656, + "logps/rejected": -455.1160583496094, + "loss": 0.5019, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -1.32621169090271, + "rewards/margins": 0.6678387522697449, + "rewards/rejected": -1.9940506219863892, + "step": 6020 + }, + { + "epoch": 0.79, + "eval_logits/chosen": -2.2531657218933105, + "eval_logits/rejected": -2.2634389400482178, + "eval_logps/chosen": -478.9855041503906, + "eval_logps/rejected": -490.62701416015625, + "eval_loss": 0.6022667288780212, + "eval_rewards/accuracies": 0.6759999990463257, + "eval_rewards/chosen": -1.4623608589172363, + "eval_rewards/margins": 0.43280887603759766, + "eval_rewards/rejected": -1.8951694965362549, + "eval_runtime": 197.0009, + "eval_samples_per_second": 10.152, + "eval_steps_per_second": 5.076, + "step": 6020 + }, + { + "epoch": 0.79, + "learning_rate": 6.471907677776426e-07, + "logits/chosen": -2.572305202484131, + "logits/rejected": -2.5147862434387207, + "logps/chosen": -504.14837646484375, + "logps/rejected": -492.93988037109375, + "loss": 0.6167, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.4243654012680054, + "rewards/margins": 0.43302780389785767, + "rewards/rejected": -1.8573930263519287, + "step": 6030 + }, + { + "epoch": 0.79, + "eval_logits/chosen": -2.2490084171295166, + "eval_logits/rejected": -2.2592198848724365, + "eval_logps/chosen": -479.2915344238281, + "eval_logps/rejected": -490.9763488769531, + "eval_loss": 0.602845311164856, + "eval_rewards/accuracies": 0.675000011920929, + "eval_rewards/chosen": -1.4654208421707153, + "eval_rewards/margins": 0.4332420825958252, + "eval_rewards/rejected": -1.89866304397583, + "eval_runtime": 197.2697, + "eval_samples_per_second": 10.138, + "eval_steps_per_second": 5.069, + "step": 6030 + }, + { + "epoch": 0.79, + "learning_rate": 6.39541552732617e-07, + "logits/chosen": -2.483621120452881, + "logits/rejected": -2.469176769256592, + "logps/chosen": -477.4981994628906, + "logps/rejected": -550.258056640625, + "loss": 0.6122, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -1.5244404077529907, + "rewards/margins": 0.39870789647102356, + "rewards/rejected": -1.9231481552124023, + "step": 6040 + }, + { + "epoch": 0.79, + "eval_logits/chosen": -2.245048999786377, + "eval_logits/rejected": -2.2553117275238037, + "eval_logps/chosen": -479.2562561035156, + "eval_logps/rejected": -490.96612548828125, + "eval_loss": 0.6031754016876221, + "eval_rewards/accuracies": 0.6725000143051147, + "eval_rewards/chosen": -1.4650685787200928, + "eval_rewards/margins": 0.4334927797317505, + "eval_rewards/rejected": -1.8985613584518433, + "eval_runtime": 197.0832, + "eval_samples_per_second": 10.148, + "eval_steps_per_second": 5.074, + "step": 6040 + }, + { + "epoch": 0.79, + "learning_rate": 6.319311748265086e-07, + "logits/chosen": -2.396491050720215, + "logits/rejected": -2.3860714435577393, + "logps/chosen": -578.4803466796875, + "logps/rejected": -558.1339721679688, + "loss": 0.5706, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.421531319618225, + "rewards/margins": 0.5684695243835449, + "rewards/rejected": -1.9900007247924805, + "step": 6050 + }, + { + "epoch": 0.79, + "eval_logits/chosen": -2.244931221008301, + "eval_logits/rejected": -2.2554006576538086, + "eval_logps/chosen": -477.5636291503906, + "eval_logps/rejected": -489.1151428222656, + "eval_loss": 0.6030679941177368, + "eval_rewards/accuracies": 0.671999990940094, + "eval_rewards/chosen": -1.4481416940689087, + "eval_rewards/margins": 0.4319096505641937, + "eval_rewards/rejected": -1.8800513744354248, + "eval_runtime": 196.8509, + "eval_samples_per_second": 10.16, + "eval_steps_per_second": 5.08, + "step": 6050 + }, + { + "epoch": 0.79, + "learning_rate": 6.243597929262404e-07, + "logits/chosen": -2.4419026374816895, + "logits/rejected": -2.36991810798645, + "logps/chosen": -425.7701721191406, + "logps/rejected": -522.5084228515625, + "loss": 0.5889, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.6083042621612549, + "rewards/margins": 0.5457934737205505, + "rewards/rejected": -2.15409779548645, + "step": 6060 + }, + { + "epoch": 0.79, + "eval_logits/chosen": -2.243594169616699, + "eval_logits/rejected": -2.2539806365966797, + "eval_logps/chosen": -478.1839294433594, + "eval_logps/rejected": -489.84661865234375, + "eval_loss": 0.6033233404159546, + "eval_rewards/accuracies": 0.6725000143051147, + "eval_rewards/chosen": -1.4543453454971313, + "eval_rewards/margins": 0.4330209493637085, + "eval_rewards/rejected": -1.8873660564422607, + "eval_runtime": 197.1008, + "eval_samples_per_second": 10.147, + "eval_steps_per_second": 5.074, + "step": 6060 + }, + { + "epoch": 0.79, + "learning_rate": 6.168275650846875e-07, + "logits/chosen": -2.5039191246032715, + "logits/rejected": -2.503308057785034, + "logps/chosen": -501.80194091796875, + "logps/rejected": -490.83343505859375, + "loss": 0.5764, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.3347852230072021, + "rewards/margins": 0.53114253282547, + "rewards/rejected": -1.8659274578094482, + "step": 6070 + }, + { + "epoch": 0.79, + "eval_logits/chosen": -2.242035388946533, + "eval_logits/rejected": -2.252312421798706, + "eval_logps/chosen": -478.4716491699219, + "eval_logps/rejected": -490.1448059082031, + "eval_loss": 0.6033748984336853, + "eval_rewards/accuracies": 0.6740000247955322, + "eval_rewards/chosen": -1.4572224617004395, + "eval_rewards/margins": 0.4331255555152893, + "eval_rewards/rejected": -1.890347957611084, + "eval_runtime": 196.9973, + "eval_samples_per_second": 10.152, + "eval_steps_per_second": 5.076, + "step": 6070 + }, + { + "epoch": 0.8, + "learning_rate": 6.093346485373863e-07, + "logits/chosen": -2.3956141471862793, + "logits/rejected": -2.3247618675231934, + "logps/chosen": -507.55584716796875, + "logps/rejected": -502.39202880859375, + "loss": 0.5793, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.5132863521575928, + "rewards/margins": 0.4537445902824402, + "rewards/rejected": -1.9670308828353882, + "step": 6080 + }, + { + "epoch": 0.8, + "eval_logits/chosen": -2.237912178039551, + "eval_logits/rejected": -2.248093605041504, + "eval_logps/chosen": -479.7290344238281, + "eval_logps/rejected": -491.5454406738281, + "eval_loss": 0.6036680936813354, + "eval_rewards/accuracies": 0.6735000014305115, + "eval_rewards/chosen": -1.4697966575622559, + "eval_rewards/margins": 0.4345575273036957, + "eval_rewards/rejected": -1.9043540954589844, + "eval_runtime": 197.1075, + "eval_samples_per_second": 10.147, + "eval_steps_per_second": 5.073, + "step": 6080 + }, + { + "epoch": 0.8, + "learning_rate": 6.018811996992455e-07, + "logits/chosen": -2.3724429607391357, + "logits/rejected": -2.404536724090576, + "logps/chosen": -489.76641845703125, + "logps/rejected": -499.05224609375, + "loss": 0.4869, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -1.3946596384048462, + "rewards/margins": 0.7288642525672913, + "rewards/rejected": -2.123523712158203, + "step": 6090 + }, + { + "epoch": 0.8, + "eval_logits/chosen": -2.2322680950164795, + "eval_logits/rejected": -2.242032527923584, + "eval_logps/chosen": -481.9536437988281, + "eval_logps/rejected": -494.1253967285156, + "eval_loss": 0.6040297150611877, + "eval_rewards/accuracies": 0.6725000143051147, + "eval_rewards/chosen": -1.4920426607131958, + "eval_rewards/margins": 0.43811145424842834, + "eval_rewards/rejected": -1.9301540851593018, + "eval_runtime": 197.2026, + "eval_samples_per_second": 10.142, + "eval_steps_per_second": 5.071, + "step": 6090 + }, + { + "epoch": 0.8, + "learning_rate": 5.944673741612866e-07, + "logits/chosen": -2.391608953475952, + "logits/rejected": -2.3916258811950684, + "logps/chosen": -503.87261962890625, + "logps/rejected": -543.1837768554688, + "loss": 0.6102, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.6538951396942139, + "rewards/margins": 0.37116914987564087, + "rewards/rejected": -2.02506422996521, + "step": 6100 + }, + { + "epoch": 0.8, + "eval_logits/chosen": -2.2282028198242188, + "eval_logits/rejected": -2.2379844188690186, + "eval_logps/chosen": -483.1098327636719, + "eval_logps/rejected": -495.4471740722656, + "eval_loss": 0.6045427322387695, + "eval_rewards/accuracies": 0.6725000143051147, + "eval_rewards/chosen": -1.5036044120788574, + "eval_rewards/margins": 0.439767062664032, + "eval_rewards/rejected": -1.943371295928955, + "eval_runtime": 197.072, + "eval_samples_per_second": 10.149, + "eval_steps_per_second": 5.074, + "step": 6100 + }, + { + "epoch": 0.8, + "learning_rate": 5.870933266873916e-07, + "logits/chosen": -2.463844060897827, + "logits/rejected": -2.460224151611328, + "logps/chosen": -424.7655334472656, + "logps/rejected": -478.42974853515625, + "loss": 0.6125, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -1.4444048404693604, + "rewards/margins": 0.4888002872467041, + "rewards/rejected": -1.9332048892974854, + "step": 6110 + }, + { + "epoch": 0.8, + "eval_logits/chosen": -2.2313151359558105, + "eval_logits/rejected": -2.241241693496704, + "eval_logps/chosen": -481.9096984863281, + "eval_logps/rejected": -494.27734375, + "eval_loss": 0.6041462421417236, + "eval_rewards/accuracies": 0.6729999780654907, + "eval_rewards/chosen": -1.4916030168533325, + "eval_rewards/margins": 0.4400705397129059, + "eval_rewards/rejected": -1.9316734075546265, + "eval_runtime": 196.949, + "eval_samples_per_second": 10.155, + "eval_steps_per_second": 5.077, + "step": 6110 + }, + { + "epoch": 0.8, + "learning_rate": 5.797592112110734e-07, + "logits/chosen": -2.377103567123413, + "logits/rejected": -2.3861546516418457, + "logps/chosen": -396.3837890625, + "logps/rejected": -416.21844482421875, + "loss": 0.6175, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -1.3936705589294434, + "rewards/margins": 0.45263057947158813, + "rewards/rejected": -1.8463008403778076, + "step": 6120 + }, + { + "epoch": 0.8, + "eval_logits/chosen": -2.2341790199279785, + "eval_logits/rejected": -2.244394063949585, + "eval_logps/chosen": -479.7630920410156, + "eval_logps/rejected": -491.9109802246094, + "eval_loss": 0.6041192412376404, + "eval_rewards/accuracies": 0.6744999885559082, + "eval_rewards/chosen": -1.470137119293213, + "eval_rewards/margins": 0.4378722608089447, + "eval_rewards/rejected": -1.9080092906951904, + "eval_runtime": 196.8201, + "eval_samples_per_second": 10.162, + "eval_steps_per_second": 5.081, + "step": 6120 + }, + { + "epoch": 0.8, + "learning_rate": 5.724651808322645e-07, + "logits/chosen": -2.410794973373413, + "logits/rejected": -2.4280776977539062, + "logps/chosen": -440.4469299316406, + "logps/rejected": -522.4815673828125, + "loss": 0.5436, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -1.3273398876190186, + "rewards/margins": 0.6034899950027466, + "rewards/rejected": -1.9308300018310547, + "step": 6130 + }, + { + "epoch": 0.8, + "eval_logits/chosen": -2.2336935997009277, + "eval_logits/rejected": -2.2439229488372803, + "eval_logps/chosen": -479.22161865234375, + "eval_logps/rejected": -491.26458740234375, + "eval_loss": 0.6043089032173157, + "eval_rewards/accuracies": 0.6740000247955322, + "eval_rewards/chosen": -1.4647226333618164, + "eval_rewards/margins": 0.4368227422237396, + "eval_rewards/rejected": -1.9015452861785889, + "eval_runtime": 196.811, + "eval_samples_per_second": 10.162, + "eval_steps_per_second": 5.081, + "step": 6130 + }, + { + "epoch": 0.8, + "learning_rate": 5.652113878141194e-07, + "logits/chosen": -2.323244333267212, + "logits/rejected": -2.281261444091797, + "logps/chosen": -386.3601989746094, + "logps/rejected": -416.959716796875, + "loss": 0.6058, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -1.3550357818603516, + "rewards/margins": 0.3657050132751465, + "rewards/rejected": -1.7207406759262085, + "step": 6140 + }, + { + "epoch": 0.8, + "eval_logits/chosen": -2.230740785598755, + "eval_logits/rejected": -2.240811586380005, + "eval_logps/chosen": -479.8324890136719, + "eval_logps/rejected": -492.0013122558594, + "eval_loss": 0.6044318079948425, + "eval_rewards/accuracies": 0.6729999780654907, + "eval_rewards/chosen": -1.4708307981491089, + "eval_rewards/margins": 0.43808186054229736, + "eval_rewards/rejected": -1.9089127779006958, + "eval_runtime": 196.8299, + "eval_samples_per_second": 10.161, + "eval_steps_per_second": 5.081, + "step": 6140 + }, + { + "epoch": 0.8, + "learning_rate": 5.579979835798361e-07, + "logits/chosen": -2.4510176181793213, + "logits/rejected": -2.375293731689453, + "logps/chosen": -444.62164306640625, + "logps/rejected": -498.118408203125, + "loss": 0.5545, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.3945045471191406, + "rewards/margins": 0.5962954759597778, + "rewards/rejected": -1.990799903869629, + "step": 6150 + }, + { + "epoch": 0.8, + "eval_logits/chosen": -2.228811740875244, + "eval_logits/rejected": -2.23856782913208, + "eval_logps/chosen": -480.6499328613281, + "eval_logps/rejected": -492.998291015625, + "eval_loss": 0.604430079460144, + "eval_rewards/accuracies": 0.6729999780654907, + "eval_rewards/chosen": -1.479004979133606, + "eval_rewards/margins": 0.43987739086151123, + "eval_rewards/rejected": -1.9188824892044067, + "eval_runtime": 196.9241, + "eval_samples_per_second": 10.156, + "eval_steps_per_second": 5.078, + "step": 6150 + }, + { + "epoch": 0.81, + "learning_rate": 5.508251187094932e-07, + "logits/chosen": -2.475147008895874, + "logits/rejected": -2.426905393600464, + "logps/chosen": -513.8081665039062, + "logps/rejected": -485.3460998535156, + "loss": 0.6665, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.5422532558441162, + "rewards/margins": 0.36423978209495544, + "rewards/rejected": -1.9064929485321045, + "step": 6160 + }, + { + "epoch": 0.81, + "eval_logits/chosen": -2.230452299118042, + "eval_logits/rejected": -2.2402803897857666, + "eval_logps/chosen": -479.9612121582031, + "eval_logps/rejected": -492.21575927734375, + "eval_loss": 0.6041795611381531, + "eval_rewards/accuracies": 0.6725000143051147, + "eval_rewards/chosen": -1.4721179008483887, + "eval_rewards/margins": 0.4389396905899048, + "eval_rewards/rejected": -1.9110575914382935, + "eval_runtime": 196.9866, + "eval_samples_per_second": 10.153, + "eval_steps_per_second": 5.076, + "step": 6160 + }, + { + "epoch": 0.81, + "learning_rate": 5.436929429369122e-07, + "logits/chosen": -2.437342882156372, + "logits/rejected": -2.391582727432251, + "logps/chosen": -441.62054443359375, + "logps/rejected": -460.2799377441406, + "loss": 0.6219, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.4500336647033691, + "rewards/margins": 0.38196703791618347, + "rewards/rejected": -1.832000732421875, + "step": 6170 + }, + { + "epoch": 0.81, + "eval_logits/chosen": -2.2336361408233643, + "eval_logits/rejected": -2.2436013221740723, + "eval_logps/chosen": -478.6950378417969, + "eval_logps/rejected": -490.7254943847656, + "eval_loss": 0.6038507223129272, + "eval_rewards/accuracies": 0.671999990940094, + "eval_rewards/chosen": -1.459456205368042, + "eval_rewards/margins": 0.43669870495796204, + "eval_rewards/rejected": -1.8961549997329712, + "eval_runtime": 197.1892, + "eval_samples_per_second": 10.143, + "eval_steps_per_second": 5.071, + "step": 6170 + }, + { + "epoch": 0.81, + "learning_rate": 5.366016051465245e-07, + "logits/chosen": -2.488328695297241, + "logits/rejected": -2.4017763137817383, + "logps/chosen": -459.72076416015625, + "logps/rejected": -505.5301818847656, + "loss": 0.543, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.4137346744537354, + "rewards/margins": 0.6287944912910461, + "rewards/rejected": -2.042529344558716, + "step": 6180 + }, + { + "epoch": 0.81, + "eval_logits/chosen": -2.2341439723968506, + "eval_logits/rejected": -2.2442734241485596, + "eval_logps/chosen": -478.4734191894531, + "eval_logps/rejected": -490.54278564453125, + "eval_loss": 0.6037075519561768, + "eval_rewards/accuracies": 0.6725000143051147, + "eval_rewards/chosen": -1.4572402238845825, + "eval_rewards/margins": 0.43708717823028564, + "eval_rewards/rejected": -1.8943274021148682, + "eval_runtime": 197.0345, + "eval_samples_per_second": 10.151, + "eval_steps_per_second": 5.075, + "step": 6180 + }, + { + "epoch": 0.81, + "learning_rate": 5.295512533702701e-07, + "logits/chosen": -2.417457103729248, + "logits/rejected": -2.3992388248443604, + "logps/chosen": -430.3199768066406, + "logps/rejected": -464.72943115234375, + "loss": 0.6159, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.4803340435028076, + "rewards/margins": 0.4091721177101135, + "rewards/rejected": -1.8895061016082764, + "step": 6190 + }, + { + "epoch": 0.81, + "eval_logits/chosen": -2.234912395477295, + "eval_logits/rejected": -2.245119571685791, + "eval_logps/chosen": -477.7791748046875, + "eval_logps/rejected": -489.7291259765625, + "eval_loss": 0.6039474010467529, + "eval_rewards/accuracies": 0.6740000247955322, + "eval_rewards/chosen": -1.4502978324890137, + "eval_rewards/margins": 0.4358930289745331, + "eval_rewards/rejected": -1.8861908912658691, + "eval_runtime": 196.9988, + "eval_samples_per_second": 10.152, + "eval_steps_per_second": 5.076, + "step": 6190 + }, + { + "epoch": 0.81, + "learning_rate": 5.225420347845023e-07, + "logits/chosen": -2.437502384185791, + "logits/rejected": -2.470454692840576, + "logps/chosen": -497.87188720703125, + "logps/rejected": -513.72802734375, + "loss": 0.6169, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.4202971458435059, + "rewards/margins": 0.4486163258552551, + "rewards/rejected": -1.8689134120941162, + "step": 6200 + }, + { + "epoch": 0.81, + "eval_logits/chosen": -2.236572265625, + "eval_logits/rejected": -2.2469632625579834, + "eval_logps/chosen": -476.685791015625, + "eval_logps/rejected": -488.4581604003906, + "eval_loss": 0.6037640571594238, + "eval_rewards/accuracies": 0.671999990940094, + "eval_rewards/chosen": -1.4393635988235474, + "eval_rewards/margins": 0.4341173768043518, + "eval_rewards/rejected": -1.8734811544418335, + "eval_runtime": 197.1174, + "eval_samples_per_second": 10.146, + "eval_steps_per_second": 5.073, + "step": 6200 + }, + { + "epoch": 0.81, + "learning_rate": 5.155740957069186e-07, + "logits/chosen": -2.591386318206787, + "logits/rejected": -2.545407772064209, + "logps/chosen": -494.3350524902344, + "logps/rejected": -491.04559326171875, + "loss": 0.6039, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.5001957416534424, + "rewards/margins": 0.4530462324619293, + "rewards/rejected": -1.9532420635223389, + "step": 6210 + }, + { + "epoch": 0.81, + "eval_logits/chosen": -2.2379512786865234, + "eval_logits/rejected": -2.2482731342315674, + "eval_logps/chosen": -476.0990295410156, + "eval_logps/rejected": -487.8376770019531, + "eval_loss": 0.6033933162689209, + "eval_rewards/accuracies": 0.6725000143051147, + "eval_rewards/chosen": -1.4334958791732788, + "eval_rewards/margins": 0.4337805509567261, + "eval_rewards/rejected": -1.8672764301300049, + "eval_runtime": 196.993, + "eval_samples_per_second": 10.153, + "eval_steps_per_second": 5.076, + "step": 6210 + }, + { + "epoch": 0.81, + "learning_rate": 5.08647581593506e-07, + "logits/chosen": -2.3962807655334473, + "logits/rejected": -2.3699183464050293, + "logps/chosen": -445.7699279785156, + "logps/rejected": -480.8201599121094, + "loss": 0.5264, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -1.2488043308258057, + "rewards/margins": 0.5704221129417419, + "rewards/rejected": -1.8192262649536133, + "step": 6220 + }, + { + "epoch": 0.81, + "eval_logits/chosen": -2.2378056049346924, + "eval_logits/rejected": -2.248084545135498, + "eval_logps/chosen": -476.7023010253906, + "eval_logps/rejected": -488.5807800292969, + "eval_loss": 0.603471577167511, + "eval_rewards/accuracies": 0.6710000038146973, + "eval_rewards/chosen": -1.4395289421081543, + "eval_rewards/margins": 0.4351785182952881, + "eval_rewards/rejected": -1.8747072219848633, + "eval_runtime": 197.2627, + "eval_samples_per_second": 10.139, + "eval_steps_per_second": 5.069, + "step": 6220 + }, + { + "epoch": 0.82, + "learning_rate": 5.017626370355014e-07, + "logits/chosen": -2.4885125160217285, + "logits/rejected": -2.3865249156951904, + "logps/chosen": -464.68365478515625, + "logps/rejected": -473.9043884277344, + "loss": 0.5109, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -1.3580504655838013, + "rewards/margins": 0.6627442240715027, + "rewards/rejected": -2.020794630050659, + "step": 6230 + }, + { + "epoch": 0.82, + "eval_logits/chosen": -2.2380645275115967, + "eval_logits/rejected": -2.248126745223999, + "eval_logps/chosen": -478.20233154296875, + "eval_logps/rejected": -490.4144592285156, + "eval_loss": 0.6032126545906067, + "eval_rewards/accuracies": 0.6740000247955322, + "eval_rewards/chosen": -1.4545294046401978, + "eval_rewards/margins": 0.4385150074958801, + "eval_rewards/rejected": -1.8930445909500122, + "eval_runtime": 197.1328, + "eval_samples_per_second": 10.145, + "eval_steps_per_second": 5.073, + "step": 6230 + }, + { + "epoch": 0.82, + "learning_rate": 4.949194057563783e-07, + "logits/chosen": -2.488008737564087, + "logits/rejected": -2.4647369384765625, + "logps/chosen": -487.2359313964844, + "logps/rejected": -460.8746643066406, + "loss": 0.643, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.485291838645935, + "rewards/margins": 0.36292168498039246, + "rewards/rejected": -1.84821355342865, + "step": 6240 + }, + { + "epoch": 0.82, + "eval_logits/chosen": -2.2388057708740234, + "eval_logits/rejected": -2.248603343963623, + "eval_logps/chosen": -478.5943298339844, + "eval_logps/rejected": -490.90087890625, + "eval_loss": 0.6029048562049866, + "eval_rewards/accuracies": 0.6735000014305115, + "eval_rewards/chosen": -1.4584497213363647, + "eval_rewards/margins": 0.4394589364528656, + "eval_rewards/rejected": -1.8979085683822632, + "eval_runtime": 197.1294, + "eval_samples_per_second": 10.146, + "eval_steps_per_second": 5.073, + "step": 6240 + }, + { + "epoch": 0.82, + "learning_rate": 4.881180306088418e-07, + "logits/chosen": -2.4483964443206787, + "logits/rejected": -2.4352147579193115, + "logps/chosen": -464.33380126953125, + "logps/rejected": -473.46636962890625, + "loss": 0.5155, + "rewards/accuracies": 0.7875000238418579, + "rewards/chosen": -1.2545944452285767, + "rewards/margins": 0.7057239413261414, + "rewards/rejected": -1.9603185653686523, + "step": 6250 + }, + { + "epoch": 0.82, + "eval_logits/chosen": -2.2375144958496094, + "eval_logits/rejected": -2.24702787399292, + "eval_logps/chosen": -480.0223083496094, + "eval_logps/rejected": -492.4518127441406, + "eval_loss": 0.6030805706977844, + "eval_rewards/accuracies": 0.6735000014305115, + "eval_rewards/chosen": -1.4727287292480469, + "eval_rewards/margins": 0.4406891465187073, + "eval_rewards/rejected": -1.913417935371399, + "eval_runtime": 196.9294, + "eval_samples_per_second": 10.156, + "eval_steps_per_second": 5.078, + "step": 6250 + }, + { + "epoch": 0.82, + "learning_rate": 4.813586535718512e-07, + "logits/chosen": -2.433474540710449, + "logits/rejected": -2.376683473587036, + "logps/chosen": -516.4065551757812, + "logps/rejected": -486.9189453125, + "loss": 0.5705, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -1.4895950555801392, + "rewards/margins": 0.5969935655593872, + "rewards/rejected": -2.0865883827209473, + "step": 6260 + }, + { + "epoch": 0.82, + "eval_logits/chosen": -2.2347970008850098, + "eval_logits/rejected": -2.24385666847229, + "eval_logps/chosen": -481.6423645019531, + "eval_logps/rejected": -494.3458557128906, + "eval_loss": 0.6028019785881042, + "eval_rewards/accuracies": 0.675000011920929, + "eval_rewards/chosen": -1.4889296293258667, + "eval_rewards/margins": 0.44342872500419617, + "eval_rewards/rejected": -1.9323583841323853, + "eval_runtime": 196.7038, + "eval_samples_per_second": 10.168, + "eval_steps_per_second": 5.084, + "step": 6260 + }, + { + "epoch": 0.82, + "learning_rate": 4.746414157476506e-07, + "logits/chosen": -2.570890188217163, + "logits/rejected": -2.5058765411376953, + "logps/chosen": -441.581787109375, + "logps/rejected": -438.6575622558594, + "loss": 0.5888, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.4001985788345337, + "rewards/margins": 0.44795188307762146, + "rewards/rejected": -1.8481504917144775, + "step": 6270 + }, + { + "epoch": 0.82, + "eval_logits/chosen": -2.2354795932769775, + "eval_logits/rejected": -2.244324207305908, + "eval_logps/chosen": -481.95770263671875, + "eval_logps/rejected": -494.7315673828125, + "eval_loss": 0.6022074222564697, + "eval_rewards/accuracies": 0.6735000014305115, + "eval_rewards/chosen": -1.4920825958251953, + "eval_rewards/margins": 0.4441326856613159, + "eval_rewards/rejected": -1.9362152814865112, + "eval_runtime": 196.9316, + "eval_samples_per_second": 10.156, + "eval_steps_per_second": 5.078, + "step": 6270 + }, + { + "epoch": 0.82, + "learning_rate": 4.679664573588294e-07, + "logits/chosen": -2.394583225250244, + "logits/rejected": -2.3104095458984375, + "logps/chosen": -439.1087951660156, + "logps/rejected": -446.10858154296875, + "loss": 0.6121, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -1.452602744102478, + "rewards/margins": 0.41477838158607483, + "rewards/rejected": -1.8673810958862305, + "step": 6280 + }, + { + "epoch": 0.82, + "eval_logits/chosen": -2.237715482711792, + "eval_logits/rejected": -2.2464842796325684, + "eval_logps/chosen": -480.85943603515625, + "eval_logps/rejected": -493.50860595703125, + "eval_loss": 0.6019992828369141, + "eval_rewards/accuracies": 0.671500027179718, + "eval_rewards/chosen": -1.4811004400253296, + "eval_rewards/margins": 0.44288545846939087, + "eval_rewards/rejected": -1.9239858388900757, + "eval_runtime": 196.8947, + "eval_samples_per_second": 10.158, + "eval_steps_per_second": 5.079, + "step": 6280 + }, + { + "epoch": 0.82, + "learning_rate": 4.6133391774538903e-07, + "logits/chosen": -2.5484490394592285, + "logits/rejected": -2.521597146987915, + "logps/chosen": -504.94378662109375, + "logps/rejected": -511.28369140625, + "loss": 0.5833, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.4214107990264893, + "rewards/margins": 0.6198548078536987, + "rewards/rejected": -2.0412654876708984, + "step": 6290 + }, + { + "epoch": 0.82, + "eval_logits/chosen": -2.2396035194396973, + "eval_logits/rejected": -2.2481977939605713, + "eval_logps/chosen": -479.93096923828125, + "eval_logps/rejected": -492.5154724121094, + "eval_loss": 0.6017520427703857, + "eval_rewards/accuracies": 0.675000011920929, + "eval_rewards/chosen": -1.4718154668807983, + "eval_rewards/margins": 0.44223955273628235, + "eval_rewards/rejected": -1.9140551090240479, + "eval_runtime": 196.9733, + "eval_samples_per_second": 10.154, + "eval_steps_per_second": 5.077, + "step": 6290 + }, + { + "epoch": 0.82, + "learning_rate": 4.5474393536184214e-07, + "logits/chosen": -2.4809508323669434, + "logits/rejected": -2.4672088623046875, + "logps/chosen": -469.42193603515625, + "logps/rejected": -466.1731872558594, + "loss": 0.5904, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -1.4090373516082764, + "rewards/margins": 0.42221125960350037, + "rewards/rejected": -1.8312486410140991, + "step": 6300 + }, + { + "epoch": 0.82, + "eval_logits/chosen": -2.2400152683258057, + "eval_logits/rejected": -2.2484843730926514, + "eval_logps/chosen": -480.49237060546875, + "eval_logps/rejected": -493.15142822265625, + "eval_loss": 0.601513147354126, + "eval_rewards/accuracies": 0.6765000224113464, + "eval_rewards/chosen": -1.4774298667907715, + "eval_rewards/margins": 0.4429841935634613, + "eval_rewards/rejected": -1.9204140901565552, + "eval_runtime": 196.9799, + "eval_samples_per_second": 10.153, + "eval_steps_per_second": 5.077, + "step": 6300 + }, + { + "epoch": 0.83, + "learning_rate": 4.4819664777431243e-07, + "logits/chosen": -2.3989458084106445, + "logits/rejected": -2.413045883178711, + "logps/chosen": -430.36669921875, + "logps/rejected": -429.37030029296875, + "loss": 0.6735, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -1.5417275428771973, + "rewards/margins": 0.24210628867149353, + "rewards/rejected": -1.7838338613510132, + "step": 6310 + }, + { + "epoch": 0.83, + "eval_logits/chosen": -2.2396247386932373, + "eval_logits/rejected": -2.2478537559509277, + "eval_logps/chosen": -480.34619140625, + "eval_logps/rejected": -492.9498291015625, + "eval_loss": 0.6015501022338867, + "eval_rewards/accuracies": 0.6735000014305115, + "eval_rewards/chosen": -1.4759677648544312, + "eval_rewards/margins": 0.442430704832077, + "eval_rewards/rejected": -1.918398380279541, + "eval_runtime": 197.2452, + "eval_samples_per_second": 10.14, + "eval_steps_per_second": 5.07, + "step": 6310 + }, + { + "epoch": 0.83, + "learning_rate": 4.416921916576722e-07, + "logits/chosen": -2.370271921157837, + "logits/rejected": -2.306959629058838, + "logps/chosen": -523.9715576171875, + "logps/rejected": -542.676025390625, + "loss": 0.6187, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -1.5519903898239136, + "rewards/margins": 0.39413270354270935, + "rewards/rejected": -1.9461231231689453, + "step": 6320 + }, + { + "epoch": 0.83, + "eval_logits/chosen": -2.239846706390381, + "eval_logits/rejected": -2.2484591007232666, + "eval_logps/chosen": -479.931884765625, + "eval_logps/rejected": -492.4515075683594, + "eval_loss": 0.60145103931427, + "eval_rewards/accuracies": 0.6759999990463257, + "eval_rewards/chosen": -1.4718244075775146, + "eval_rewards/margins": 0.44159045815467834, + "eval_rewards/rejected": -1.9134151935577393, + "eval_runtime": 196.8848, + "eval_samples_per_second": 10.158, + "eval_steps_per_second": 5.079, + "step": 6320 + }, + { + "epoch": 0.83, + "learning_rate": 4.352307027926828e-07, + "logits/chosen": -2.4330732822418213, + "logits/rejected": -2.4312150478363037, + "logps/chosen": -477.30023193359375, + "logps/rejected": -500.92828369140625, + "loss": 0.5178, + "rewards/accuracies": 0.8125, + "rewards/chosen": -1.4120477437973022, + "rewards/margins": 0.6749431490898132, + "rewards/rejected": -2.08699107170105, + "step": 6330 + }, + { + "epoch": 0.83, + "eval_logits/chosen": -2.2387287616729736, + "eval_logits/rejected": -2.24711537361145, + "eval_logps/chosen": -480.0934143066406, + "eval_logps/rejected": -492.5596618652344, + "eval_loss": 0.6014659404754639, + "eval_rewards/accuracies": 0.671999990940094, + "eval_rewards/chosen": -1.4734398126602173, + "eval_rewards/margins": 0.44105657935142517, + "eval_rewards/rejected": -1.9144963026046753, + "eval_runtime": 197.0557, + "eval_samples_per_second": 10.149, + "eval_steps_per_second": 5.075, + "step": 6330 + }, + { + "epoch": 0.83, + "learning_rate": 4.288123160631624e-07, + "logits/chosen": -2.299553394317627, + "logits/rejected": -2.3260738849639893, + "logps/chosen": -446.32916259765625, + "logps/rejected": -465.8793029785156, + "loss": 0.6297, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.429883599281311, + "rewards/margins": 0.3902866244316101, + "rewards/rejected": -1.8201701641082764, + "step": 6340 + }, + { + "epoch": 0.83, + "eval_logits/chosen": -2.2386250495910645, + "eval_logits/rejected": -2.246933698654175, + "eval_logps/chosen": -479.9998474121094, + "eval_logps/rejected": -492.525634765625, + "eval_loss": 0.6013615727424622, + "eval_rewards/accuracies": 0.6769999861717224, + "eval_rewards/chosen": -1.4725043773651123, + "eval_rewards/margins": 0.44165146350860596, + "eval_rewards/rejected": -1.9141559600830078, + "eval_runtime": 197.1029, + "eval_samples_per_second": 10.147, + "eval_steps_per_second": 5.073, + "step": 6340 + }, + { + "epoch": 0.83, + "learning_rate": 4.224371654531731e-07, + "logits/chosen": -2.4214088916778564, + "logits/rejected": -2.4219307899475098, + "logps/chosen": -453.132080078125, + "logps/rejected": -448.65966796875, + "loss": 0.6506, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.5275781154632568, + "rewards/margins": 0.310029536485672, + "rewards/rejected": -1.8376076221466064, + "step": 6350 + }, + { + "epoch": 0.83, + "eval_logits/chosen": -2.2398183345794678, + "eval_logits/rejected": -2.2483553886413574, + "eval_logps/chosen": -478.97662353515625, + "eval_logps/rejected": -491.418701171875, + "eval_loss": 0.6013292074203491, + "eval_rewards/accuracies": 0.675000011920929, + "eval_rewards/chosen": -1.4622721672058105, + "eval_rewards/margins": 0.4408148229122162, + "eval_rewards/rejected": -1.9030870199203491, + "eval_runtime": 196.9669, + "eval_samples_per_second": 10.154, + "eval_steps_per_second": 5.077, + "step": 6350 + }, + { + "epoch": 0.83, + "learning_rate": 4.1610538404421837e-07, + "logits/chosen": -2.3899099826812744, + "logits/rejected": -2.4600508213043213, + "logps/chosen": -445.9964904785156, + "logps/rejected": -517.3856811523438, + "loss": 0.5739, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -1.4132336378097534, + "rewards/margins": 0.5214926600456238, + "rewards/rejected": -1.9347261190414429, + "step": 6360 + }, + { + "epoch": 0.83, + "eval_logits/chosen": -2.2410073280334473, + "eval_logits/rejected": -2.2497498989105225, + "eval_logps/chosen": -478.55841064453125, + "eval_logps/rejected": -490.9558410644531, + "eval_loss": 0.6013907790184021, + "eval_rewards/accuracies": 0.671999990940094, + "eval_rewards/chosen": -1.45809006690979, + "eval_rewards/margins": 0.44036784768104553, + "eval_rewards/rejected": -1.8984578847885132, + "eval_runtime": 197.0733, + "eval_samples_per_second": 10.149, + "eval_steps_per_second": 5.074, + "step": 6360 + }, + { + "epoch": 0.83, + "learning_rate": 4.098171040124699e-07, + "logits/chosen": -2.4912033081054688, + "logits/rejected": -2.441131114959717, + "logps/chosen": -545.6764526367188, + "logps/rejected": -493.4827575683594, + "loss": 0.6565, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.530342698097229, + "rewards/margins": 0.35169893503189087, + "rewards/rejected": -1.8820416927337646, + "step": 6370 + }, + { + "epoch": 0.83, + "eval_logits/chosen": -2.241713285446167, + "eval_logits/rejected": -2.250527858734131, + "eval_logps/chosen": -478.0467834472656, + "eval_logps/rejected": -490.4145812988281, + "eval_loss": 0.6011056303977966, + "eval_rewards/accuracies": 0.6765000224113464, + "eval_rewards/chosen": -1.45297372341156, + "eval_rewards/margins": 0.4400714933872223, + "eval_rewards/rejected": -1.89304518699646, + "eval_runtime": 196.8293, + "eval_samples_per_second": 10.161, + "eval_steps_per_second": 5.081, + "step": 6370 + }, + { + "epoch": 0.83, + "learning_rate": 4.03572456626006e-07, + "logits/chosen": -2.4287922382354736, + "logits/rejected": -2.431551694869995, + "logps/chosen": -479.798583984375, + "logps/rejected": -491.73321533203125, + "loss": 0.6382, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.4386435747146606, + "rewards/margins": 0.33013081550598145, + "rewards/rejected": -1.7687742710113525, + "step": 6380 + }, + { + "epoch": 0.83, + "eval_logits/chosen": -2.2448699474334717, + "eval_logits/rejected": -2.2537577152252197, + "eval_logps/chosen": -476.6830139160156, + "eval_logps/rejected": -488.75482177734375, + "eval_loss": 0.6012539863586426, + "eval_rewards/accuracies": 0.6744999885559082, + "eval_rewards/chosen": -1.4393357038497925, + "eval_rewards/margins": 0.43711209297180176, + "eval_rewards/rejected": -1.8764480352401733, + "eval_runtime": 196.7731, + "eval_samples_per_second": 10.164, + "eval_steps_per_second": 5.082, + "step": 6380 + }, + { + "epoch": 0.84, + "learning_rate": 3.9737157224207265e-07, + "logits/chosen": -2.4541070461273193, + "logits/rejected": -2.4512617588043213, + "logps/chosen": -434.3753356933594, + "logps/rejected": -461.8311462402344, + "loss": 0.6057, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -1.3837672472000122, + "rewards/margins": 0.3907342553138733, + "rewards/rejected": -1.7745015621185303, + "step": 6390 + }, + { + "epoch": 0.84, + "eval_logits/chosen": -2.2441306114196777, + "eval_logits/rejected": -2.253051280975342, + "eval_logps/chosen": -476.2626037597656, + "eval_logps/rejected": -488.24658203125, + "eval_loss": 0.6012148857116699, + "eval_rewards/accuracies": 0.675000011920929, + "eval_rewards/chosen": -1.4351314306259155, + "eval_rewards/margins": 0.4362344443798065, + "eval_rewards/rejected": -1.8713661432266235, + "eval_runtime": 196.8681, + "eval_samples_per_second": 10.159, + "eval_steps_per_second": 5.08, + "step": 6390 + }, + { + "epoch": 0.84, + "learning_rate": 3.912145803043596e-07, + "logits/chosen": -2.4305484294891357, + "logits/rejected": -2.4521608352661133, + "logps/chosen": -497.2237243652344, + "logps/rejected": -483.7049255371094, + "loss": 0.6532, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -1.5712474584579468, + "rewards/margins": 0.24624311923980713, + "rewards/rejected": -1.817490816116333, + "step": 6400 + }, + { + "epoch": 0.84, + "eval_logits/chosen": -2.2431321144104004, + "eval_logits/rejected": -2.251950263977051, + "eval_logps/chosen": -476.27880859375, + "eval_logps/rejected": -488.2311706542969, + "eval_loss": 0.6009360551834106, + "eval_rewards/accuracies": 0.6754999756813049, + "eval_rewards/chosen": -1.4352940320968628, + "eval_rewards/margins": 0.4359172582626343, + "eval_rewards/rejected": -1.8712114095687866, + "eval_runtime": 197.2084, + "eval_samples_per_second": 10.142, + "eval_steps_per_second": 5.071, + "step": 6400 + }, + { + "epoch": 0.84, + "learning_rate": 3.851016093403023e-07, + "logits/chosen": -2.3944671154022217, + "logits/rejected": -2.3814704418182373, + "logps/chosen": -421.3961486816406, + "logps/rejected": -465.965087890625, + "loss": 0.5729, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.4689327478408813, + "rewards/margins": 0.5277568101882935, + "rewards/rejected": -1.9966895580291748, + "step": 6410 + }, + { + "epoch": 0.84, + "eval_logits/chosen": -2.2426493167877197, + "eval_logits/rejected": -2.251471519470215, + "eval_logps/chosen": -476.30645751953125, + "eval_logps/rejected": -488.2204895019531, + "eval_loss": 0.6010193824768066, + "eval_rewards/accuracies": 0.6754999756813049, + "eval_rewards/chosen": -1.4355709552764893, + "eval_rewards/margins": 0.4355340600013733, + "eval_rewards/rejected": -1.8711049556732178, + "eval_runtime": 196.8977, + "eval_samples_per_second": 10.158, + "eval_steps_per_second": 5.079, + "step": 6410 + }, + { + "epoch": 0.84, + "learning_rate": 3.7903278695839456e-07, + "logits/chosen": -2.40942120552063, + "logits/rejected": -2.4272334575653076, + "logps/chosen": -461.953369140625, + "logps/rejected": -469.8707580566406, + "loss": 0.6151, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.4102437496185303, + "rewards/margins": 0.3656821846961975, + "rewards/rejected": -1.775925874710083, + "step": 6420 + }, + { + "epoch": 0.84, + "eval_logits/chosen": -2.2420711517333984, + "eval_logits/rejected": -2.2507448196411133, + "eval_logps/chosen": -476.4427490234375, + "eval_logps/rejected": -488.3708801269531, + "eval_loss": 0.6009459495544434, + "eval_rewards/accuracies": 0.6759999990463257, + "eval_rewards/chosen": -1.436933159828186, + "eval_rewards/margins": 0.43567579984664917, + "eval_rewards/rejected": -1.8726087808609009, + "eval_runtime": 197.1777, + "eval_samples_per_second": 10.143, + "eval_steps_per_second": 5.072, + "step": 6420 + }, + { + "epoch": 0.84, + "learning_rate": 3.7300823984552983e-07, + "logits/chosen": -2.473325252532959, + "logits/rejected": -2.47148060798645, + "logps/chosen": -417.900146484375, + "logps/rejected": -481.50640869140625, + "loss": 0.5601, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -1.3513211011886597, + "rewards/margins": 0.47909989953041077, + "rewards/rejected": -1.8304208517074585, + "step": 6430 + }, + { + "epoch": 0.84, + "eval_logits/chosen": -2.240222930908203, + "eval_logits/rejected": -2.2487614154815674, + "eval_logps/chosen": -477.5064392089844, + "eval_logps/rejected": -489.5664367675781, + "eval_loss": 0.600739598274231, + "eval_rewards/accuracies": 0.675000011920929, + "eval_rewards/chosen": -1.4475706815719604, + "eval_rewards/margins": 0.4369937479496002, + "eval_rewards/rejected": -1.8845641613006592, + "eval_runtime": 197.1547, + "eval_samples_per_second": 10.144, + "eval_steps_per_second": 5.072, + "step": 6430 + }, + { + "epoch": 0.84, + "learning_rate": 3.670280937643503e-07, + "logits/chosen": -2.3927805423736572, + "logits/rejected": -2.369868278503418, + "logps/chosen": -466.90692138671875, + "logps/rejected": -466.72296142578125, + "loss": 0.6153, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.4468450546264648, + "rewards/margins": 0.46017885208129883, + "rewards/rejected": -1.9070237874984741, + "step": 6440 + }, + { + "epoch": 0.84, + "eval_logits/chosen": -2.2400424480438232, + "eval_logits/rejected": -2.248690605163574, + "eval_logps/chosen": -478.0989074707031, + "eval_logps/rejected": -490.25750732421875, + "eval_loss": 0.6008526682853699, + "eval_rewards/accuracies": 0.6735000014305115, + "eval_rewards/chosen": -1.453494668006897, + "eval_rewards/margins": 0.4379802644252777, + "eval_rewards/rejected": -1.8914748430252075, + "eval_runtime": 196.9596, + "eval_samples_per_second": 10.154, + "eval_steps_per_second": 5.077, + "step": 6440 + }, + { + "epoch": 0.84, + "learning_rate": 3.610924735506274e-07, + "logits/chosen": -2.4371469020843506, + "logits/rejected": -2.372954845428467, + "logps/chosen": -517.828125, + "logps/rejected": -467.57928466796875, + "loss": 0.6261, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -1.4241763353347778, + "rewards/margins": 0.3400752544403076, + "rewards/rejected": -1.764251470565796, + "step": 6450 + }, + { + "epoch": 0.84, + "eval_logits/chosen": -2.241787910461426, + "eval_logits/rejected": -2.2503960132598877, + "eval_logps/chosen": -477.5443115234375, + "eval_logps/rejected": -489.5791015625, + "eval_loss": 0.6006221771240234, + "eval_rewards/accuracies": 0.675000011920929, + "eval_rewards/chosen": -1.447948694229126, + "eval_rewards/margins": 0.4367419481277466, + "eval_rewards/rejected": -1.8846906423568726, + "eval_runtime": 196.8552, + "eval_samples_per_second": 10.16, + "eval_steps_per_second": 5.08, + "step": 6450 + }, + { + "epoch": 0.85, + "learning_rate": 3.5520150311065316e-07, + "logits/chosen": -2.4051318168640137, + "logits/rejected": -2.383820056915283, + "logps/chosen": -488.4266662597656, + "logps/rejected": -504.89776611328125, + "loss": 0.5422, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.3867498636245728, + "rewards/margins": 0.5661884546279907, + "rewards/rejected": -1.9529380798339844, + "step": 6460 + }, + { + "epoch": 0.85, + "eval_logits/chosen": -2.2409005165100098, + "eval_logits/rejected": -2.2494056224823, + "eval_logps/chosen": -478.5350646972656, + "eval_logps/rejected": -490.6539611816406, + "eval_loss": 0.600521981716156, + "eval_rewards/accuracies": 0.6735000014305115, + "eval_rewards/chosen": -1.4578566551208496, + "eval_rewards/margins": 0.4375828802585602, + "eval_rewards/rejected": -1.895439624786377, + "eval_runtime": 196.7864, + "eval_samples_per_second": 10.163, + "eval_steps_per_second": 5.082, + "step": 6460 + }, + { + "epoch": 0.85, + "learning_rate": 3.493553054186527e-07, + "logits/chosen": -2.449218273162842, + "logits/rejected": -2.4553260803222656, + "logps/chosen": -477.9852600097656, + "logps/rejected": -502.3858337402344, + "loss": 0.6368, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.5308899879455566, + "rewards/margins": 0.34937649965286255, + "rewards/rejected": -1.880266785621643, + "step": 6470 + }, + { + "epoch": 0.85, + "eval_logits/chosen": -2.2385032176971436, + "eval_logits/rejected": -2.2466988563537598, + "eval_logps/chosen": -479.49285888671875, + "eval_logps/rejected": -491.6705627441406, + "eval_loss": 0.6005980372428894, + "eval_rewards/accuracies": 0.6744999885559082, + "eval_rewards/chosen": -1.4674347639083862, + "eval_rewards/margins": 0.4381706118583679, + "eval_rewards/rejected": -1.9056053161621094, + "eval_runtime": 197.1181, + "eval_samples_per_second": 10.146, + "eval_steps_per_second": 5.073, + "step": 6470 + }, + { + "epoch": 0.85, + "learning_rate": 3.4355400251421977e-07, + "logits/chosen": -2.3681087493896484, + "logits/rejected": -2.379730701446533, + "logps/chosen": -453.9375915527344, + "logps/rejected": -469.03369140625, + "loss": 0.6252, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.4286261796951294, + "rewards/margins": 0.4488712251186371, + "rewards/rejected": -1.8774973154067993, + "step": 6480 + }, + { + "epoch": 0.85, + "eval_logits/chosen": -2.2367665767669678, + "eval_logits/rejected": -2.2451555728912354, + "eval_logps/chosen": -480.6306457519531, + "eval_logps/rejected": -492.9149169921875, + "eval_loss": 0.6005855798721313, + "eval_rewards/accuracies": 0.6744999885559082, + "eval_rewards/chosen": -1.4788126945495605, + "eval_rewards/margins": 0.43923622369766235, + "eval_rewards/rejected": -1.9180489778518677, + "eval_runtime": 197.0243, + "eval_samples_per_second": 10.151, + "eval_steps_per_second": 5.076, + "step": 6480 + }, + { + "epoch": 0.85, + "learning_rate": 3.3779771549976637e-07, + "logits/chosen": -2.4080824851989746, + "logits/rejected": -2.3765482902526855, + "logps/chosen": -462.2020568847656, + "logps/rejected": -484.9317321777344, + "loss": 0.5981, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -1.5909029245376587, + "rewards/margins": 0.45654287934303284, + "rewards/rejected": -2.047445774078369, + "step": 6490 + }, + { + "epoch": 0.85, + "eval_logits/chosen": -2.2361011505126953, + "eval_logits/rejected": -2.2442541122436523, + "eval_logps/chosen": -481.1080322265625, + "eval_logps/rejected": -493.4491882324219, + "eval_loss": 0.6004220247268677, + "eval_rewards/accuracies": 0.6735000014305115, + "eval_rewards/chosen": -1.4835866689682007, + "eval_rewards/margins": 0.4398048222064972, + "eval_rewards/rejected": -1.9233914613723755, + "eval_runtime": 196.7987, + "eval_samples_per_second": 10.163, + "eval_steps_per_second": 5.081, + "step": 6490 + }, + { + "epoch": 0.85, + "learning_rate": 3.3208656453799783e-07, + "logits/chosen": -2.4739370346069336, + "logits/rejected": -2.448183536529541, + "logps/chosen": -442.1339416503906, + "logps/rejected": -459.9366149902344, + "loss": 0.5529, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -1.3421974182128906, + "rewards/margins": 0.5246790647506714, + "rewards/rejected": -1.8668766021728516, + "step": 6500 + }, + { + "epoch": 0.85, + "eval_logits/chosen": -2.2351410388946533, + "eval_logits/rejected": -2.2432808876037598, + "eval_logps/chosen": -481.5740966796875, + "eval_logps/rejected": -493.9289855957031, + "eval_loss": 0.6004652380943298, + "eval_rewards/accuracies": 0.6754999756813049, + "eval_rewards/chosen": -1.4882471561431885, + "eval_rewards/margins": 0.4399425983428955, + "eval_rewards/rejected": -1.928189992904663, + "eval_runtime": 197.2487, + "eval_samples_per_second": 10.139, + "eval_steps_per_second": 5.07, + "step": 6500 + }, + { + "epoch": 0.85, + "learning_rate": 3.2642066884940064e-07, + "logits/chosen": -2.4060733318328857, + "logits/rejected": -2.4098830223083496, + "logps/chosen": -493.73419189453125, + "logps/rejected": -517.33056640625, + "loss": 0.6469, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.5257792472839355, + "rewards/margins": 0.3727341294288635, + "rewards/rejected": -1.8985134363174438, + "step": 6510 + }, + { + "epoch": 0.85, + "eval_logits/chosen": -2.234895944595337, + "eval_logits/rejected": -2.242981433868408, + "eval_logps/chosen": -481.7478942871094, + "eval_logps/rejected": -494.1064758300781, + "eval_loss": 0.600500226020813, + "eval_rewards/accuracies": 0.6735000014305115, + "eval_rewards/chosen": -1.4899851083755493, + "eval_rewards/margins": 0.4399791359901428, + "eval_rewards/rejected": -1.9299641847610474, + "eval_runtime": 197.1585, + "eval_samples_per_second": 10.144, + "eval_steps_per_second": 5.072, + "step": 6510 + }, + { + "epoch": 0.85, + "learning_rate": 3.2080014670975825e-07, + "logits/chosen": -2.5220677852630615, + "logits/rejected": -2.4988842010498047, + "logps/chosen": -455.0575256347656, + "logps/rejected": -451.1307067871094, + "loss": 0.6271, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.4372724294662476, + "rewards/margins": 0.333114355802536, + "rewards/rejected": -1.7703866958618164, + "step": 6520 + }, + { + "epoch": 0.85, + "eval_logits/chosen": -2.2341480255126953, + "eval_logits/rejected": -2.2423062324523926, + "eval_logps/chosen": -482.0383605957031, + "eval_logps/rejected": -494.4227294921875, + "eval_loss": 0.6005258560180664, + "eval_rewards/accuracies": 0.6740000247955322, + "eval_rewards/chosen": -1.4928892850875854, + "eval_rewards/margins": 0.44023728370666504, + "eval_rewards/rejected": -1.933126449584961, + "eval_runtime": 197.2498, + "eval_samples_per_second": 10.139, + "eval_steps_per_second": 5.07, + "step": 6520 + }, + { + "epoch": 0.85, + "learning_rate": 3.152251154476765e-07, + "logits/chosen": -2.4268569946289062, + "logits/rejected": -2.4182865619659424, + "logps/chosen": -450.36834716796875, + "logps/rejected": -480.23565673828125, + "loss": 0.5816, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -1.4854668378829956, + "rewards/margins": 0.43363720178604126, + "rewards/rejected": -1.919103980064392, + "step": 6530 + }, + { + "epoch": 0.85, + "eval_logits/chosen": -2.234081506729126, + "eval_logits/rejected": -2.242432117462158, + "eval_logps/chosen": -482.54486083984375, + "eval_logps/rejected": -495.0234375, + "eval_loss": 0.6005407571792603, + "eval_rewards/accuracies": 0.6744999885559082, + "eval_rewards/chosen": -1.4979546070098877, + "eval_rewards/margins": 0.44117987155914307, + "eval_rewards/rejected": -1.9391344785690308, + "eval_runtime": 197.2391, + "eval_samples_per_second": 10.14, + "eval_steps_per_second": 5.07, + "step": 6530 + }, + { + "epoch": 0.86, + "learning_rate": 3.0969569144214147e-07, + "logits/chosen": -2.513247013092041, + "logits/rejected": -2.4432804584503174, + "logps/chosen": -486.681640625, + "logps/rejected": -488.5477600097656, + "loss": 0.561, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.462301254272461, + "rewards/margins": 0.48198261857032776, + "rewards/rejected": -1.9442840814590454, + "step": 6540 + }, + { + "epoch": 0.86, + "eval_logits/chosen": -2.2322230339050293, + "eval_logits/rejected": -2.240504741668701, + "eval_logps/chosen": -483.12017822265625, + "eval_logps/rejected": -495.6880187988281, + "eval_loss": 0.6006953120231628, + "eval_rewards/accuracies": 0.6725000143051147, + "eval_rewards/chosen": -1.5037076473236084, + "eval_rewards/margins": 0.4420722723007202, + "eval_rewards/rejected": -1.945779800415039, + "eval_runtime": 196.7754, + "eval_samples_per_second": 10.164, + "eval_steps_per_second": 5.082, + "step": 6540 + }, + { + "epoch": 0.86, + "learning_rate": 3.042119901200824e-07, + "logits/chosen": -2.3795104026794434, + "logits/rejected": -2.4172751903533936, + "logps/chosen": -436.779052734375, + "logps/rejected": -513.870849609375, + "loss": 0.5886, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.441890001296997, + "rewards/margins": 0.4301987588405609, + "rewards/rejected": -1.8720887899398804, + "step": 6550 + }, + { + "epoch": 0.86, + "eval_logits/chosen": -2.230128288269043, + "eval_logits/rejected": -2.2384142875671387, + "eval_logps/chosen": -483.65203857421875, + "eval_logps/rejected": -496.2925720214844, + "eval_loss": 0.6009081602096558, + "eval_rewards/accuracies": 0.6740000247955322, + "eval_rewards/chosen": -1.5090264081954956, + "eval_rewards/margins": 0.44279909133911133, + "eval_rewards/rejected": -1.9518253803253174, + "eval_runtime": 196.8398, + "eval_samples_per_second": 10.161, + "eval_steps_per_second": 5.08, + "step": 6550 + }, + { + "epoch": 0.86, + "learning_rate": 2.9877412595396726e-07, + "logits/chosen": -2.5197033882141113, + "logits/rejected": -2.546976089477539, + "logps/chosen": -532.2468872070312, + "logps/rejected": -527.4337158203125, + "loss": 0.6015, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -1.4535622596740723, + "rewards/margins": 0.48877015709877014, + "rewards/rejected": -1.9423322677612305, + "step": 6560 + }, + { + "epoch": 0.86, + "eval_logits/chosen": -2.2307028770446777, + "eval_logits/rejected": -2.238967180252075, + "eval_logps/chosen": -482.97442626953125, + "eval_logps/rejected": -495.572021484375, + "eval_loss": 0.6008643507957458, + "eval_rewards/accuracies": 0.6735000014305115, + "eval_rewards/chosen": -1.5022499561309814, + "eval_rewards/margins": 0.44237011671066284, + "eval_rewards/rejected": -1.9446200132369995, + "eval_runtime": 197.0423, + "eval_samples_per_second": 10.15, + "eval_steps_per_second": 5.075, + "step": 6560 + }, + { + "epoch": 0.86, + "learning_rate": 2.933822124594124e-07, + "logits/chosen": -2.4213218688964844, + "logits/rejected": -2.343491792678833, + "logps/chosen": -468.56561279296875, + "logps/rejected": -463.643798828125, + "loss": 0.6231, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -1.473144769668579, + "rewards/margins": 0.37735602259635925, + "rewards/rejected": -1.8505008220672607, + "step": 6570 + }, + { + "epoch": 0.86, + "eval_logits/chosen": -2.232693910598755, + "eval_logits/rejected": -2.2410120964050293, + "eval_logps/chosen": -481.6788635253906, + "eval_logps/rejected": -494.13519287109375, + "eval_loss": 0.6006700396537781, + "eval_rewards/accuracies": 0.6740000247955322, + "eval_rewards/chosen": -1.4892946481704712, + "eval_rewards/margins": 0.4409571588039398, + "eval_rewards/rejected": -1.9302517175674438, + "eval_runtime": 196.782, + "eval_samples_per_second": 10.164, + "eval_steps_per_second": 5.082, + "step": 6570 + }, + { + "epoch": 0.86, + "learning_rate": 2.880363621928106e-07, + "logits/chosen": -2.4120044708251953, + "logits/rejected": -2.3957927227020264, + "logps/chosen": -492.4474182128906, + "logps/rejected": -481.00347900390625, + "loss": 0.6087, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -1.5052903890609741, + "rewards/margins": 0.3901470899581909, + "rewards/rejected": -1.895437479019165, + "step": 6580 + }, + { + "epoch": 0.86, + "eval_logits/chosen": -2.233774423599243, + "eval_logits/rejected": -2.24210524559021, + "eval_logps/chosen": -480.93988037109375, + "eval_logps/rejected": -493.30426025390625, + "eval_loss": 0.6006296277046204, + "eval_rewards/accuracies": 0.6735000014305115, + "eval_rewards/chosen": -1.4819048643112183, + "eval_rewards/margins": 0.4400372803211212, + "eval_rewards/rejected": -1.921942114830017, + "eval_runtime": 196.6703, + "eval_samples_per_second": 10.169, + "eval_steps_per_second": 5.085, + "step": 6580 + }, + { + "epoch": 0.86, + "learning_rate": 2.82736686748985e-07, + "logits/chosen": -2.4532999992370605, + "logits/rejected": -2.3916611671447754, + "logps/chosen": -489.4159240722656, + "logps/rejected": -455.03265380859375, + "loss": 0.5943, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.4414112567901611, + "rewards/margins": 0.4629778265953064, + "rewards/rejected": -1.9043890237808228, + "step": 6590 + }, + { + "epoch": 0.86, + "eval_logits/chosen": -2.235504388809204, + "eval_logits/rejected": -2.243652105331421, + "eval_logps/chosen": -481.0194396972656, + "eval_logps/rejected": -493.4203186035156, + "eval_loss": 0.6003859639167786, + "eval_rewards/accuracies": 0.6740000247955322, + "eval_rewards/chosen": -1.4827001094818115, + "eval_rewards/margins": 0.4404028654098511, + "eval_rewards/rejected": -1.923102855682373, + "eval_runtime": 197.1197, + "eval_samples_per_second": 10.146, + "eval_steps_per_second": 5.073, + "step": 6590 + }, + { + "epoch": 0.86, + "learning_rate": 2.774832967588556e-07, + "logits/chosen": -2.450917959213257, + "logits/rejected": -2.4179370403289795, + "logps/chosen": -505.9139099121094, + "logps/rejected": -501.72265625, + "loss": 0.6125, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.4615542888641357, + "rewards/margins": 0.38382774591445923, + "rewards/rejected": -1.8453820943832397, + "step": 6600 + }, + { + "epoch": 0.86, + "eval_logits/chosen": -2.2341389656066895, + "eval_logits/rejected": -2.242079496383667, + "eval_logps/chosen": -481.54388427734375, + "eval_logps/rejected": -493.9536437988281, + "eval_loss": 0.6004937887191772, + "eval_rewards/accuracies": 0.6729999780654907, + "eval_rewards/chosen": -1.4879448413848877, + "eval_rewards/margins": 0.4404914081096649, + "eval_rewards/rejected": -1.9284361600875854, + "eval_runtime": 197.0336, + "eval_samples_per_second": 10.151, + "eval_steps_per_second": 5.075, + "step": 6600 + }, + { + "epoch": 0.86, + "learning_rate": 2.7227630188713326e-07, + "logits/chosen": -2.489640951156616, + "logits/rejected": -2.4417996406555176, + "logps/chosen": -524.0570068359375, + "logps/rejected": -503.34356689453125, + "loss": 0.6032, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.4702174663543701, + "rewards/margins": 0.5279144048690796, + "rewards/rejected": -1.9981319904327393, + "step": 6610 + }, + { + "epoch": 0.86, + "eval_logits/chosen": -2.235076665878296, + "eval_logits/rejected": -2.2431137561798096, + "eval_logps/chosen": -481.8366394042969, + "eval_logps/rejected": -494.28448486328125, + "eval_loss": 0.6003357172012329, + "eval_rewards/accuracies": 0.6735000014305115, + "eval_rewards/chosen": -1.4908719062805176, + "eval_rewards/margins": 0.44087329506874084, + "eval_rewards/rejected": -1.9317452907562256, + "eval_runtime": 196.8034, + "eval_samples_per_second": 10.162, + "eval_steps_per_second": 5.081, + "step": 6610 + }, + { + "epoch": 0.87, + "learning_rate": 2.671158108300284e-07, + "logits/chosen": -2.5051331520080566, + "logits/rejected": -2.4897053241729736, + "logps/chosen": -482.470703125, + "logps/rejected": -524.2755126953125, + "loss": 0.6535, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -1.655645728111267, + "rewards/margins": 0.30718177556991577, + "rewards/rejected": -1.9628273248672485, + "step": 6620 + }, + { + "epoch": 0.87, + "eval_logits/chosen": -2.2355244159698486, + "eval_logits/rejected": -2.243473768234253, + "eval_logps/chosen": -482.01336669921875, + "eval_logps/rejected": -494.4171447753906, + "eval_loss": 0.6003087162971497, + "eval_rewards/accuracies": 0.6744999885559082, + "eval_rewards/chosen": -1.4926397800445557, + "eval_rewards/margins": 0.44043198227882385, + "eval_rewards/rejected": -1.9330717325210571, + "eval_runtime": 196.9961, + "eval_samples_per_second": 10.152, + "eval_steps_per_second": 5.076, + "step": 6620 + }, + { + "epoch": 0.87, + "learning_rate": 2.6200193131298376e-07, + "logits/chosen": -2.515141010284424, + "logits/rejected": -2.5287601947784424, + "logps/chosen": -498.6051330566406, + "logps/rejected": -511.29669189453125, + "loss": 0.5795, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.4465413093566895, + "rewards/margins": 0.5301742553710938, + "rewards/rejected": -1.9767156839370728, + "step": 6630 + }, + { + "epoch": 0.87, + "eval_logits/chosen": -2.2372593879699707, + "eval_logits/rejected": -2.2451207637786865, + "eval_logps/chosen": -481.894775390625, + "eval_logps/rejected": -494.25543212890625, + "eval_loss": 0.5999908447265625, + "eval_rewards/accuracies": 0.6735000014305115, + "eval_rewards/chosen": -1.491453766822815, + "eval_rewards/margins": 0.44000041484832764, + "eval_rewards/rejected": -1.9314541816711426, + "eval_runtime": 197.0556, + "eval_samples_per_second": 10.149, + "eval_steps_per_second": 5.075, + "step": 6630 + }, + { + "epoch": 0.87, + "learning_rate": 2.569347700884217e-07, + "logits/chosen": -2.476605176925659, + "logits/rejected": -2.4527993202209473, + "logps/chosen": -492.2688903808594, + "logps/rejected": -488.52581787109375, + "loss": 0.5202, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -1.4653801918029785, + "rewards/margins": 0.6056791543960571, + "rewards/rejected": -2.071059465408325, + "step": 6640 + }, + { + "epoch": 0.87, + "eval_logits/chosen": -2.2377192974090576, + "eval_logits/rejected": -2.2457220554351807, + "eval_logps/chosen": -482.15899658203125, + "eval_logps/rejected": -494.47711181640625, + "eval_loss": 0.6003398895263672, + "eval_rewards/accuracies": 0.6735000014305115, + "eval_rewards/chosen": -1.494095802307129, + "eval_rewards/margins": 0.4395754337310791, + "eval_rewards/rejected": -1.9336711168289185, + "eval_runtime": 196.9628, + "eval_samples_per_second": 10.154, + "eval_steps_per_second": 5.077, + "step": 6640 + }, + { + "epoch": 0.87, + "learning_rate": 2.5191443293352186e-07, + "logits/chosen": -2.4760589599609375, + "logits/rejected": -2.4655823707580566, + "logps/chosen": -502.6334533691406, + "logps/rejected": -545.2277221679688, + "loss": 0.596, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.4817638397216797, + "rewards/margins": 0.5151968598365784, + "rewards/rejected": -1.9969608783721924, + "step": 6650 + }, + { + "epoch": 0.87, + "eval_logits/chosen": -2.2369654178619385, + "eval_logits/rejected": -2.244926929473877, + "eval_logps/chosen": -482.20147705078125, + "eval_logps/rejected": -494.5907287597656, + "eval_loss": 0.6002098321914673, + "eval_rewards/accuracies": 0.6729999780654907, + "eval_rewards/chosen": -1.494520902633667, + "eval_rewards/margins": 0.4402860999107361, + "eval_rewards/rejected": -1.9348070621490479, + "eval_runtime": 196.8284, + "eval_samples_per_second": 10.161, + "eval_steps_per_second": 5.081, + "step": 6650 + }, + { + "epoch": 0.87, + "learning_rate": 2.469410246480067e-07, + "logits/chosen": -2.4040045738220215, + "logits/rejected": -2.351503372192383, + "logps/chosen": -447.82080078125, + "logps/rejected": -474.35174560546875, + "loss": 0.5465, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.53019118309021, + "rewards/margins": 0.588119626045227, + "rewards/rejected": -2.1183109283447266, + "step": 6660 + }, + { + "epoch": 0.87, + "eval_logits/chosen": -2.2365779876708984, + "eval_logits/rejected": -2.2444467544555664, + "eval_logps/chosen": -482.3564758300781, + "eval_logps/rejected": -494.7445373535156, + "eval_loss": 0.6002839207649231, + "eval_rewards/accuracies": 0.6729999780654907, + "eval_rewards/chosen": -1.4960702657699585, + "eval_rewards/margins": 0.4402748942375183, + "eval_rewards/rejected": -1.9363453388214111, + "eval_runtime": 197.1637, + "eval_samples_per_second": 10.144, + "eval_steps_per_second": 5.072, + "step": 6660 + }, + { + "epoch": 0.87, + "learning_rate": 2.4201464905195955e-07, + "logits/chosen": -2.543325185775757, + "logits/rejected": -2.540952205657959, + "logps/chosen": -471.36322021484375, + "logps/rejected": -488.7928771972656, + "loss": 0.6745, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -1.5087602138519287, + "rewards/margins": 0.25316157937049866, + "rewards/rejected": -1.7619216442108154, + "step": 6670 + }, + { + "epoch": 0.87, + "eval_logits/chosen": -2.236558675765991, + "eval_logits/rejected": -2.244509696960449, + "eval_logps/chosen": -482.4366149902344, + "eval_logps/rejected": -494.8221435546875, + "eval_loss": 0.6003116965293884, + "eval_rewards/accuracies": 0.6740000247955322, + "eval_rewards/chosen": -1.496871829032898, + "eval_rewards/margins": 0.44024935364723206, + "eval_rewards/rejected": -1.9371213912963867, + "eval_runtime": 197.0333, + "eval_samples_per_second": 10.151, + "eval_steps_per_second": 5.075, + "step": 6670 + }, + { + "epoch": 0.87, + "learning_rate": 2.3713540898365196e-07, + "logits/chosen": -2.4039158821105957, + "logits/rejected": -2.384819507598877, + "logps/chosen": -464.6339416503906, + "logps/rejected": -482.588623046875, + "loss": 0.5327, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.373579740524292, + "rewards/margins": 0.5761127471923828, + "rewards/rejected": -1.9496924877166748, + "step": 6680 + }, + { + "epoch": 0.87, + "eval_logits/chosen": -2.23722767829895, + "eval_logits/rejected": -2.2453808784484863, + "eval_logps/chosen": -482.1029052734375, + "eval_logps/rejected": -494.4505310058594, + "eval_loss": 0.6002626419067383, + "eval_rewards/accuracies": 0.6729999780654907, + "eval_rewards/chosen": -1.4935351610183716, + "eval_rewards/margins": 0.4398702085018158, + "eval_rewards/rejected": -1.9334051609039307, + "eval_runtime": 196.8241, + "eval_samples_per_second": 10.161, + "eval_steps_per_second": 5.081, + "step": 6680 + }, + { + "epoch": 0.88, + "learning_rate": 2.3230340629740166e-07, + "logits/chosen": -2.5268912315368652, + "logits/rejected": -2.479428291320801, + "logps/chosen": -470.13348388671875, + "logps/rejected": -472.2806701660156, + "loss": 0.6052, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.431485891342163, + "rewards/margins": 0.33059996366500854, + "rewards/rejected": -1.7620859146118164, + "step": 6690 + }, + { + "epoch": 0.88, + "eval_logits/chosen": -2.2347989082336426, + "eval_logits/rejected": -2.2428057193756104, + "eval_logps/chosen": -482.34716796875, + "eval_logps/rejected": -494.74395751953125, + "eval_loss": 0.6002459526062012, + "eval_rewards/accuracies": 0.6740000247955322, + "eval_rewards/chosen": -1.4959776401519775, + "eval_rewards/margins": 0.44036149978637695, + "eval_rewards/rejected": -1.936339259147644, + "eval_runtime": 196.8663, + "eval_samples_per_second": 10.159, + "eval_steps_per_second": 5.08, + "step": 6690 + }, + { + "epoch": 0.88, + "learning_rate": 2.2751874186144357e-07, + "logits/chosen": -2.497739315032959, + "logits/rejected": -2.468701124191284, + "logps/chosen": -498.406005859375, + "logps/rejected": -471.77764892578125, + "loss": 0.6264, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -1.3730546236038208, + "rewards/margins": 0.34841588139533997, + "rewards/rejected": -1.7214704751968384, + "step": 6700 + }, + { + "epoch": 0.88, + "eval_logits/chosen": -2.235518217086792, + "eval_logits/rejected": -2.243518590927124, + "eval_logps/chosen": -482.25439453125, + "eval_logps/rejected": -494.6637878417969, + "eval_loss": 0.600059986114502, + "eval_rewards/accuracies": 0.671999990940094, + "eval_rewards/chosen": -1.4950499534606934, + "eval_rewards/margins": 0.44048792123794556, + "eval_rewards/rejected": -1.9355378150939941, + "eval_runtime": 197.1099, + "eval_samples_per_second": 10.147, + "eval_steps_per_second": 5.073, + "step": 6700 + }, + { + "epoch": 0.88, + "learning_rate": 2.227815155558241e-07, + "logits/chosen": -2.5343174934387207, + "logits/rejected": -2.5697665214538574, + "logps/chosen": -496.79345703125, + "logps/rejected": -521.640869140625, + "loss": 0.599, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.47087824344635, + "rewards/margins": 0.46131792664527893, + "rewards/rejected": -1.9321959018707275, + "step": 6710 + }, + { + "epoch": 0.88, + "eval_logits/chosen": -2.2363016605377197, + "eval_logits/rejected": -2.2442235946655273, + "eval_logps/chosen": -481.9609680175781, + "eval_logps/rejected": -494.30633544921875, + "eval_loss": 0.6001153588294983, + "eval_rewards/accuracies": 0.6729999780654907, + "eval_rewards/chosen": -1.4921154975891113, + "eval_rewards/margins": 0.4398481845855713, + "eval_rewards/rejected": -1.9319636821746826, + "eval_runtime": 196.858, + "eval_samples_per_second": 10.16, + "eval_steps_per_second": 5.08, + "step": 6710 + }, + { + "epoch": 0.88, + "learning_rate": 2.1809182627031883e-07, + "logits/chosen": -2.5412440299987793, + "logits/rejected": -2.4768128395080566, + "logps/chosen": -498.4740295410156, + "logps/rejected": -514.843505859375, + "loss": 0.5517, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -1.4493227005004883, + "rewards/margins": 0.5438094139099121, + "rewards/rejected": -1.9931319952011108, + "step": 6720 + }, + { + "epoch": 0.88, + "eval_logits/chosen": -2.2361483573913574, + "eval_logits/rejected": -2.243964195251465, + "eval_logps/chosen": -482.1773681640625, + "eval_logps/rejected": -494.5125732421875, + "eval_loss": 0.6001316905021667, + "eval_rewards/accuracies": 0.6725000143051147, + "eval_rewards/chosen": -1.4942797422409058, + "eval_rewards/margins": 0.4397459924221039, + "eval_rewards/rejected": -1.934025764465332, + "eval_runtime": 196.9792, + "eval_samples_per_second": 10.153, + "eval_steps_per_second": 5.077, + "step": 6720 + }, + { + "epoch": 0.88, + "learning_rate": 2.1344977190236372e-07, + "logits/chosen": -2.3600761890411377, + "logits/rejected": -2.283154249191284, + "logps/chosen": -441.1805725097656, + "logps/rejected": -480.2210998535156, + "loss": 0.6085, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.4329464435577393, + "rewards/margins": 0.3956843316555023, + "rewards/rejected": -1.828630805015564, + "step": 6730 + }, + { + "epoch": 0.88, + "eval_logits/chosen": -2.235147714614868, + "eval_logits/rejected": -2.242946147918701, + "eval_logps/chosen": -482.44781494140625, + "eval_logps/rejected": -494.79400634765625, + "eval_loss": 0.6002436876296997, + "eval_rewards/accuracies": 0.6740000247955322, + "eval_rewards/chosen": -1.4969840049743652, + "eval_rewards/margins": 0.4398559629917145, + "eval_rewards/rejected": -1.9368400573730469, + "eval_runtime": 196.9267, + "eval_samples_per_second": 10.156, + "eval_steps_per_second": 5.078, + "step": 6730 + }, + { + "epoch": 0.88, + "learning_rate": 2.0885544935501656e-07, + "logits/chosen": -2.467778444290161, + "logits/rejected": -2.52734637260437, + "logps/chosen": -444.9013671875, + "logps/rejected": -490.29522705078125, + "loss": 0.5446, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -1.3631761074066162, + "rewards/margins": 0.5013788342475891, + "rewards/rejected": -1.86455500125885, + "step": 6740 + }, + { + "epoch": 0.88, + "eval_logits/chosen": -2.2349860668182373, + "eval_logits/rejected": -2.2425625324249268, + "eval_logps/chosen": -482.5580749511719, + "eval_logps/rejected": -494.9825134277344, + "eval_loss": 0.6000815629959106, + "eval_rewards/accuracies": 0.6740000247955322, + "eval_rewards/chosen": -1.4980865716934204, + "eval_rewards/margins": 0.4406384229660034, + "eval_rewards/rejected": -1.9387251138687134, + "eval_runtime": 197.4709, + "eval_samples_per_second": 10.128, + "eval_steps_per_second": 5.064, + "step": 6740 + }, + { + "epoch": 0.88, + "learning_rate": 2.0430895453492944e-07, + "logits/chosen": -2.446242570877075, + "logits/rejected": -2.4823126792907715, + "logps/chosen": -528.4280395507812, + "logps/rejected": -505.59527587890625, + "loss": 0.6626, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -1.5998499393463135, + "rewards/margins": 0.24394333362579346, + "rewards/rejected": -1.843793511390686, + "step": 6750 + }, + { + "epoch": 0.88, + "eval_logits/chosen": -2.2358670234680176, + "eval_logits/rejected": -2.2437267303466797, + "eval_logps/chosen": -482.26654052734375, + "eval_logps/rejected": -494.6257629394531, + "eval_loss": 0.6001067757606506, + "eval_rewards/accuracies": 0.6729999780654907, + "eval_rewards/chosen": -1.4951714277267456, + "eval_rewards/margins": 0.4399857223033905, + "eval_rewards/rejected": -1.9351569414138794, + "eval_runtime": 197.0616, + "eval_samples_per_second": 10.149, + "eval_steps_per_second": 5.075, + "step": 6750 + }, + { + "epoch": 0.88, + "learning_rate": 1.9981038235035111e-07, + "logits/chosen": -2.442606210708618, + "logits/rejected": -2.4387025833129883, + "logps/chosen": -446.9200744628906, + "logps/rejected": -488.510009765625, + "loss": 0.5305, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -1.3141353130340576, + "rewards/margins": 0.5878725051879883, + "rewards/rejected": -1.9020076990127563, + "step": 6760 + }, + { + "epoch": 0.88, + "eval_logits/chosen": -2.2357892990112305, + "eval_logits/rejected": -2.2436020374298096, + "eval_logps/chosen": -482.08953857421875, + "eval_logps/rejected": -494.4461669921875, + "eval_loss": 0.60005122423172, + "eval_rewards/accuracies": 0.6735000014305115, + "eval_rewards/chosen": -1.4934011697769165, + "eval_rewards/margins": 0.43996042013168335, + "eval_rewards/rejected": -1.933361530303955, + "eval_runtime": 196.9804, + "eval_samples_per_second": 10.153, + "eval_steps_per_second": 5.077, + "step": 6760 + }, + { + "epoch": 0.89, + "learning_rate": 1.9535982670914112e-07, + "logits/chosen": -2.3814468383789062, + "logits/rejected": -2.3742775917053223, + "logps/chosen": -506.7943420410156, + "logps/rejected": -512.1139526367188, + "loss": 0.5956, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -1.455214262008667, + "rewards/margins": 0.4944564700126648, + "rewards/rejected": -1.9496707916259766, + "step": 6770 + }, + { + "epoch": 0.89, + "eval_logits/chosen": -2.236239433288574, + "eval_logits/rejected": -2.244074821472168, + "eval_logps/chosen": -481.68572998046875, + "eval_logps/rejected": -493.983154296875, + "eval_loss": 0.6000664234161377, + "eval_rewards/accuracies": 0.6735000014305115, + "eval_rewards/chosen": -1.4893630743026733, + "eval_rewards/margins": 0.4393681585788727, + "eval_rewards/rejected": -1.9287313222885132, + "eval_runtime": 196.9554, + "eval_samples_per_second": 10.155, + "eval_steps_per_second": 5.077, + "step": 6770 + }, + { + "epoch": 0.89, + "learning_rate": 1.9095738051681412e-07, + "logits/chosen": -2.392882823944092, + "logits/rejected": -2.3796443939208984, + "logps/chosen": -444.6842346191406, + "logps/rejected": -494.61102294921875, + "loss": 0.5703, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.58041250705719, + "rewards/margins": 0.47645503282546997, + "rewards/rejected": -2.0568675994873047, + "step": 6780 + }, + { + "epoch": 0.89, + "eval_logits/chosen": -2.2347044944763184, + "eval_logits/rejected": -2.2426021099090576, + "eval_logps/chosen": -481.5654296875, + "eval_logps/rejected": -493.9347839355469, + "eval_loss": 0.6000974178314209, + "eval_rewards/accuracies": 0.6744999885559082, + "eval_rewards/chosen": -1.4881603717803955, + "eval_rewards/margins": 0.4400874078273773, + "eval_rewards/rejected": -1.9282478094100952, + "eval_runtime": 197.2369, + "eval_samples_per_second": 10.14, + "eval_steps_per_second": 5.07, + "step": 6780 + }, + { + "epoch": 0.89, + "learning_rate": 1.8660313567459703e-07, + "logits/chosen": -2.4689860343933105, + "logits/rejected": -2.507202386856079, + "logps/chosen": -423.113037109375, + "logps/rejected": -489.84747314453125, + "loss": 0.519, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -1.3348274230957031, + "rewards/margins": 0.7304555177688599, + "rewards/rejected": -2.0652830600738525, + "step": 6790 + }, + { + "epoch": 0.89, + "eval_logits/chosen": -2.234759569168091, + "eval_logits/rejected": -2.242676019668579, + "eval_logps/chosen": -481.4300842285156, + "eval_logps/rejected": -493.7626953125, + "eval_loss": 0.6002135276794434, + "eval_rewards/accuracies": 0.675000011920929, + "eval_rewards/chosen": -1.486806869506836, + "eval_rewards/margins": 0.43971991539001465, + "eval_rewards/rejected": -1.9265269041061401, + "eval_runtime": 196.7938, + "eval_samples_per_second": 10.163, + "eval_steps_per_second": 5.081, + "step": 6790 + }, + { + "epoch": 0.89, + "learning_rate": 1.8229718307751165e-07, + "logits/chosen": -2.5024523735046387, + "logits/rejected": -2.4301934242248535, + "logps/chosen": -508.9176330566406, + "logps/rejected": -503.2557678222656, + "loss": 0.5557, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -1.5061525106430054, + "rewards/margins": 0.6268715858459473, + "rewards/rejected": -2.133024215698242, + "step": 6800 + }, + { + "epoch": 0.89, + "eval_logits/chosen": -2.235121011734009, + "eval_logits/rejected": -2.243008852005005, + "eval_logps/chosen": -480.85699462890625, + "eval_logps/rejected": -493.17816162109375, + "eval_loss": 0.6001080274581909, + "eval_rewards/accuracies": 0.6725000143051147, + "eval_rewards/chosen": -1.4810760021209717, + "eval_rewards/margins": 0.43960532546043396, + "eval_rewards/rejected": -1.920681118965149, + "eval_runtime": 196.9579, + "eval_samples_per_second": 10.154, + "eval_steps_per_second": 5.077, + "step": 6800 + }, + { + "epoch": 0.89, + "learning_rate": 1.7803961261247864e-07, + "logits/chosen": -2.397812604904175, + "logits/rejected": -2.4298148155212402, + "logps/chosen": -493.19952392578125, + "logps/rejected": -522.0126953125, + "loss": 0.5928, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.461112380027771, + "rewards/margins": 0.4707748293876648, + "rewards/rejected": -1.9318872690200806, + "step": 6810 + }, + { + "epoch": 0.89, + "eval_logits/chosen": -2.2350032329559326, + "eval_logits/rejected": -2.242875576019287, + "eval_logps/chosen": -480.5160217285156, + "eval_logps/rejected": -492.8398132324219, + "eval_loss": 0.6000543236732483, + "eval_rewards/accuracies": 0.6735000014305115, + "eval_rewards/chosen": -1.4776662588119507, + "eval_rewards/margins": 0.43963193893432617, + "eval_rewards/rejected": -1.9172983169555664, + "eval_runtime": 197.0366, + "eval_samples_per_second": 10.15, + "eval_steps_per_second": 5.075, + "step": 6810 + }, + { + "epoch": 0.89, + "learning_rate": 1.7383051315643772e-07, + "logits/chosen": -2.451185703277588, + "logits/rejected": -2.4309628009796143, + "logps/chosen": -506.1298828125, + "logps/rejected": -492.93927001953125, + "loss": 0.6184, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.559695839881897, + "rewards/margins": 0.43002885580062866, + "rewards/rejected": -1.9897247552871704, + "step": 6820 + }, + { + "epoch": 0.89, + "eval_logits/chosen": -2.2355458736419678, + "eval_logits/rejected": -2.243511199951172, + "eval_logps/chosen": -480.4143981933594, + "eval_logps/rejected": -492.6844482421875, + "eval_loss": 0.600212037563324, + "eval_rewards/accuracies": 0.6740000247955322, + "eval_rewards/chosen": -1.4766501188278198, + "eval_rewards/margins": 0.4390944242477417, + "eval_rewards/rejected": -1.9157445430755615, + "eval_runtime": 197.0764, + "eval_samples_per_second": 10.148, + "eval_steps_per_second": 5.074, + "step": 6820 + }, + { + "epoch": 0.89, + "learning_rate": 1.6966997257449685e-07, + "logits/chosen": -2.4615304470062256, + "logits/rejected": -2.423633098602295, + "logps/chosen": -487.74188232421875, + "logps/rejected": -501.6026306152344, + "loss": 0.6065, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -1.4967652559280396, + "rewards/margins": 0.42426905035972595, + "rewards/rejected": -1.9210344552993774, + "step": 6830 + }, + { + "epoch": 0.89, + "eval_logits/chosen": -2.2352402210235596, + "eval_logits/rejected": -2.2431232929229736, + "eval_logps/chosen": -480.4826354980469, + "eval_logps/rejected": -492.7511901855469, + "eval_loss": 0.6002153754234314, + "eval_rewards/accuracies": 0.6744999885559082, + "eval_rewards/chosen": -1.4773321151733398, + "eval_rewards/margins": 0.4390796720981598, + "eval_rewards/rejected": -1.9164117574691772, + "eval_runtime": 196.8292, + "eval_samples_per_second": 10.161, + "eval_steps_per_second": 5.081, + "step": 6830 + }, + { + "epoch": 0.9, + "learning_rate": 1.6555807771809375e-07, + "logits/chosen": -2.443737506866455, + "logits/rejected": -2.424933910369873, + "logps/chosen": -455.24761962890625, + "logps/rejected": -443.237060546875, + "loss": 0.5943, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.4251893758773804, + "rewards/margins": 0.5287860035896301, + "rewards/rejected": -1.9539753198623657, + "step": 6840 + }, + { + "epoch": 0.9, + "eval_logits/chosen": -2.2357749938964844, + "eval_logits/rejected": -2.2437076568603516, + "eval_logps/chosen": -480.072021484375, + "eval_logps/rejected": -492.3343811035156, + "eval_loss": 0.5999860763549805, + "eval_rewards/accuracies": 0.675000011920929, + "eval_rewards/chosen": -1.4732260704040527, + "eval_rewards/margins": 0.43901708722114563, + "eval_rewards/rejected": -1.912243127822876, + "eval_runtime": 197.0688, + "eval_samples_per_second": 10.149, + "eval_steps_per_second": 5.074, + "step": 6840 + }, + { + "epoch": 0.9, + "learning_rate": 1.6149491442318617e-07, + "logits/chosen": -2.4913601875305176, + "logits/rejected": -2.4751856327056885, + "logps/chosen": -459.0533752441406, + "logps/rejected": -494.43646240234375, + "loss": 0.6122, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.4198737144470215, + "rewards/margins": 0.444546639919281, + "rewards/rejected": -1.8644202947616577, + "step": 6850 + }, + { + "epoch": 0.9, + "eval_logits/chosen": -2.2358829975128174, + "eval_logits/rejected": -2.2437386512756348, + "eval_logps/chosen": -479.94873046875, + "eval_logps/rejected": -492.1989440917969, + "eval_loss": 0.5999601483345032, + "eval_rewards/accuracies": 0.6729999780654907, + "eval_rewards/chosen": -1.4719932079315186, + "eval_rewards/margins": 0.43889597058296204, + "eval_rewards/rejected": -1.9108891487121582, + "eval_runtime": 196.9527, + "eval_samples_per_second": 10.155, + "eval_steps_per_second": 5.077, + "step": 6850 + }, + { + "epoch": 0.9, + "learning_rate": 1.5748056750845786e-07, + "logits/chosen": -2.4793450832366943, + "logits/rejected": -2.4470067024230957, + "logps/chosen": -486.44891357421875, + "logps/rejected": -461.64312744140625, + "loss": 0.5781, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.5266351699829102, + "rewards/margins": 0.48350292444229126, + "rewards/rejected": -2.0101380348205566, + "step": 6860 + }, + { + "epoch": 0.9, + "eval_logits/chosen": -2.2366299629211426, + "eval_logits/rejected": -2.2447266578674316, + "eval_logps/chosen": -479.8468017578125, + "eval_logps/rejected": -492.0443420410156, + "eval_loss": 0.6001272201538086, + "eval_rewards/accuracies": 0.6729999780654907, + "eval_rewards/chosen": -1.4709739685058594, + "eval_rewards/margins": 0.4383690655231476, + "eval_rewards/rejected": -1.9093430042266846, + "eval_runtime": 196.8817, + "eval_samples_per_second": 10.158, + "eval_steps_per_second": 5.079, + "step": 6860 + }, + { + "epoch": 0.9, + "learning_rate": 1.5351512077355024e-07, + "logits/chosen": -2.428464412689209, + "logits/rejected": -2.386335849761963, + "logps/chosen": -524.8412475585938, + "logps/rejected": -589.8968505859375, + "loss": 0.5874, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.4802411794662476, + "rewards/margins": 0.5046831965446472, + "rewards/rejected": -1.984924554824829, + "step": 6870 + }, + { + "epoch": 0.9, + "eval_logits/chosen": -2.2367382049560547, + "eval_logits/rejected": -2.244694471359253, + "eval_logps/chosen": -480.0219421386719, + "eval_logps/rejected": -492.270263671875, + "eval_loss": 0.5999786853790283, + "eval_rewards/accuracies": 0.675000011920929, + "eval_rewards/chosen": -1.47272527217865, + "eval_rewards/margins": 0.4388763904571533, + "eval_rewards/rejected": -1.9116017818450928, + "eval_runtime": 197.036, + "eval_samples_per_second": 10.15, + "eval_steps_per_second": 5.075, + "step": 6870 + }, + { + "epoch": 0.9, + "learning_rate": 1.4959865699730902e-07, + "logits/chosen": -2.414353847503662, + "logits/rejected": -2.3764188289642334, + "logps/chosen": -447.9195861816406, + "logps/rejected": -450.7298889160156, + "loss": 0.5447, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -1.5458955764770508, + "rewards/margins": 0.5172951817512512, + "rewards/rejected": -2.0631909370422363, + "step": 6880 + }, + { + "epoch": 0.9, + "eval_logits/chosen": -2.236314058303833, + "eval_logits/rejected": -2.244189977645874, + "eval_logps/chosen": -480.1126708984375, + "eval_logps/rejected": -492.3921203613281, + "eval_loss": 0.5999928116798401, + "eval_rewards/accuracies": 0.6729999780654907, + "eval_rewards/chosen": -1.4736328125, + "eval_rewards/margins": 0.43918824195861816, + "eval_rewards/rejected": -1.9128209352493286, + "eval_runtime": 196.9069, + "eval_samples_per_second": 10.157, + "eval_steps_per_second": 5.079, + "step": 6880 + }, + { + "epoch": 0.9, + "learning_rate": 1.4573125793606202e-07, + "logits/chosen": -2.4773974418640137, + "logits/rejected": -2.478883743286133, + "logps/chosen": -425.7857360839844, + "logps/rejected": -457.80084228515625, + "loss": 0.6112, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -1.4574798345565796, + "rewards/margins": 0.476410448551178, + "rewards/rejected": -1.9338905811309814, + "step": 6890 + }, + { + "epoch": 0.9, + "eval_logits/chosen": -2.2367420196533203, + "eval_logits/rejected": -2.2447338104248047, + "eval_logps/chosen": -480.1895751953125, + "eval_logps/rejected": -492.44073486328125, + "eval_loss": 0.5999522805213928, + "eval_rewards/accuracies": 0.6759999990463257, + "eval_rewards/chosen": -1.4744013547897339, + "eval_rewards/margins": 0.43890616297721863, + "eval_rewards/rejected": -1.913307547569275, + "eval_runtime": 196.8113, + "eval_samples_per_second": 10.162, + "eval_steps_per_second": 5.081, + "step": 6890 + }, + { + "epoch": 0.9, + "learning_rate": 1.4191300432190634e-07, + "logits/chosen": -2.407351016998291, + "logits/rejected": -2.36082124710083, + "logps/chosen": -492.70574951171875, + "logps/rejected": -501.9463806152344, + "loss": 0.6134, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -1.5695149898529053, + "rewards/margins": 0.4132605195045471, + "rewards/rejected": -1.9827754497528076, + "step": 6900 + }, + { + "epoch": 0.9, + "eval_logits/chosen": -2.2375595569610596, + "eval_logits/rejected": -2.245429754257202, + "eval_logps/chosen": -479.95867919921875, + "eval_logps/rejected": -492.1669616699219, + "eval_loss": 0.5999838709831238, + "eval_rewards/accuracies": 0.6740000247955322, + "eval_rewards/chosen": -1.4720933437347412, + "eval_rewards/margins": 0.43847644329071045, + "eval_rewards/rejected": -1.910569667816162, + "eval_runtime": 196.8622, + "eval_samples_per_second": 10.159, + "eval_steps_per_second": 5.08, + "step": 6900 + }, + { + "epoch": 0.9, + "learning_rate": 1.381439758610284e-07, + "logits/chosen": -2.4294683933258057, + "logits/rejected": -2.388927936553955, + "logps/chosen": -458.1944274902344, + "logps/rejected": -468.96124267578125, + "loss": 0.5998, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -1.3803436756134033, + "rewards/margins": 0.33844703435897827, + "rewards/rejected": -1.7187906503677368, + "step": 6910 + }, + { + "epoch": 0.9, + "eval_logits/chosen": -2.2376015186309814, + "eval_logits/rejected": -2.245645523071289, + "eval_logps/chosen": -480.1687927246094, + "eval_logps/rejected": -492.4361267089844, + "eval_loss": 0.5998128652572632, + "eval_rewards/accuracies": 0.6744999885559082, + "eval_rewards/chosen": -1.4741934537887573, + "eval_rewards/margins": 0.4390679597854614, + "eval_rewards/rejected": -1.9132615327835083, + "eval_runtime": 197.0022, + "eval_samples_per_second": 10.152, + "eval_steps_per_second": 5.076, + "step": 6910 + }, + { + "epoch": 0.91, + "learning_rate": 1.3442425123203596e-07, + "logits/chosen": -2.542816638946533, + "logits/rejected": -2.5520670413970947, + "logps/chosen": -458.94775390625, + "logps/rejected": -500.8182678222656, + "loss": 0.5518, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.4125124216079712, + "rewards/margins": 0.5205521583557129, + "rewards/rejected": -1.9330646991729736, + "step": 6920 + }, + { + "epoch": 0.91, + "eval_logits/chosen": -2.2370002269744873, + "eval_logits/rejected": -2.244837999343872, + "eval_logps/chosen": -480.6543884277344, + "eval_logps/rejected": -492.9961853027344, + "eval_loss": 0.5997794270515442, + "eval_rewards/accuracies": 0.6740000247955322, + "eval_rewards/chosen": -1.4790493249893188, + "eval_rewards/margins": 0.4398118257522583, + "eval_rewards/rejected": -1.9188611507415771, + "eval_runtime": 196.9578, + "eval_samples_per_second": 10.154, + "eval_steps_per_second": 5.077, + "step": 6920 + }, + { + "epoch": 0.91, + "learning_rate": 1.3075390808431897e-07, + "logits/chosen": -2.33107328414917, + "logits/rejected": -2.374955654144287, + "logps/chosen": -438.92376708984375, + "logps/rejected": -458.869873046875, + "loss": 0.5815, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -1.4029204845428467, + "rewards/margins": 0.5280667543411255, + "rewards/rejected": -1.9309873580932617, + "step": 6930 + }, + { + "epoch": 0.91, + "eval_logits/chosen": -2.2373814582824707, + "eval_logits/rejected": -2.2452640533447266, + "eval_logps/chosen": -480.60675048828125, + "eval_logps/rejected": -492.9164733886719, + "eval_loss": 0.5998906493186951, + "eval_rewards/accuracies": 0.6740000247955322, + "eval_rewards/chosen": -1.4785739183425903, + "eval_rewards/margins": 0.43949049711227417, + "eval_rewards/rejected": -1.9180644750595093, + "eval_runtime": 197.0525, + "eval_samples_per_second": 10.15, + "eval_steps_per_second": 5.075, + "step": 6930 + }, + { + "epoch": 0.91, + "learning_rate": 1.271330230364262e-07, + "logits/chosen": -2.484471559524536, + "logits/rejected": -2.485959529876709, + "logps/chosen": -447.9771423339844, + "logps/rejected": -548.21630859375, + "loss": 0.5728, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -1.4104546308517456, + "rewards/margins": 0.5502049326896667, + "rewards/rejected": -1.9606596231460571, + "step": 6940 + }, + { + "epoch": 0.91, + "eval_logits/chosen": -2.237180709838867, + "eval_logits/rejected": -2.2449331283569336, + "eval_logps/chosen": -480.6366271972656, + "eval_logps/rejected": -492.9146728515625, + "eval_loss": 0.600125253200531, + "eval_rewards/accuracies": 0.6735000014305115, + "eval_rewards/chosen": -1.478872299194336, + "eval_rewards/margins": 0.4391743242740631, + "eval_rewards/rejected": -1.9180465936660767, + "eval_runtime": 196.7765, + "eval_samples_per_second": 10.164, + "eval_steps_per_second": 5.082, + "step": 6940 + }, + { + "epoch": 0.91, + "learning_rate": 1.2356167167446698e-07, + "logits/chosen": -2.468034029006958, + "logits/rejected": -2.458634853363037, + "logps/chosen": -452.5840759277344, + "logps/rejected": -506.529541015625, + "loss": 0.6253, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.5923188924789429, + "rewards/margins": 0.39848339557647705, + "rewards/rejected": -1.9908021688461304, + "step": 6950 + }, + { + "epoch": 0.91, + "eval_logits/chosen": -2.2377233505249023, + "eval_logits/rejected": -2.245729446411133, + "eval_logps/chosen": -480.3329772949219, + "eval_logps/rejected": -492.6123046875, + "eval_loss": 0.5999380946159363, + "eval_rewards/accuracies": 0.6744999885559082, + "eval_rewards/chosen": -1.4758356809616089, + "eval_rewards/margins": 0.43918731808662415, + "eval_rewards/rejected": -1.9150229692459106, + "eval_runtime": 196.8314, + "eval_samples_per_second": 10.161, + "eval_steps_per_second": 5.08, + "step": 6950 + }, + { + "epoch": 0.91, + "learning_rate": 1.2003992855053326e-07, + "logits/chosen": -2.441638231277466, + "logits/rejected": -2.3864188194274902, + "logps/chosen": -437.62322998046875, + "logps/rejected": -482.9440002441406, + "loss": 0.5998, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.4483708143234253, + "rewards/margins": 0.5788255929946899, + "rewards/rejected": -2.0271964073181152, + "step": 6960 + }, + { + "epoch": 0.91, + "eval_logits/chosen": -2.2372143268585205, + "eval_logits/rejected": -2.245234966278076, + "eval_logps/chosen": -480.1387634277344, + "eval_logps/rejected": -492.3878173828125, + "eval_loss": 0.5998957753181458, + "eval_rewards/accuracies": 0.6744999885559082, + "eval_rewards/chosen": -1.473893404006958, + "eval_rewards/margins": 0.4388843774795532, + "eval_rewards/rejected": -1.9127776622772217, + "eval_runtime": 196.9899, + "eval_samples_per_second": 10.153, + "eval_steps_per_second": 5.076, + "step": 6960 + }, + { + "epoch": 0.91, + "learning_rate": 1.1656786718114239e-07, + "logits/chosen": -2.410566806793213, + "logits/rejected": -2.415010690689087, + "logps/chosen": -461.14923095703125, + "logps/rejected": -489.13330078125, + "loss": 0.6105, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.4670493602752686, + "rewards/margins": 0.4196097254753113, + "rewards/rejected": -1.886659026145935, + "step": 6970 + }, + { + "epoch": 0.91, + "eval_logits/chosen": -2.2372076511383057, + "eval_logits/rejected": -2.2453556060791016, + "eval_logps/chosen": -479.9434509277344, + "eval_logps/rejected": -492.099365234375, + "eval_loss": 0.6001380681991577, + "eval_rewards/accuracies": 0.6740000247955322, + "eval_rewards/chosen": -1.4719403982162476, + "eval_rewards/margins": 0.43795305490493774, + "eval_rewards/rejected": -1.909893274307251, + "eval_runtime": 197.05, + "eval_samples_per_second": 10.15, + "eval_steps_per_second": 5.075, + "step": 6970 + }, + { + "epoch": 0.91, + "learning_rate": 1.1314556004570487e-07, + "logits/chosen": -2.394918918609619, + "logits/rejected": -2.4230122566223145, + "logps/chosen": -413.9336853027344, + "logps/rejected": -471.86785888671875, + "loss": 0.6255, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.4994906187057495, + "rewards/margins": 0.32537388801574707, + "rewards/rejected": -1.824864387512207, + "step": 6980 + }, + { + "epoch": 0.91, + "eval_logits/chosen": -2.2366092205047607, + "eval_logits/rejected": -2.2446200847625732, + "eval_logps/chosen": -480.2217102050781, + "eval_logps/rejected": -492.47747802734375, + "eval_loss": 0.6001001000404358, + "eval_rewards/accuracies": 0.6744999885559082, + "eval_rewards/chosen": -1.474722981452942, + "eval_rewards/margins": 0.4389515519142151, + "eval_rewards/rejected": -1.9136745929718018, + "eval_runtime": 197.1272, + "eval_samples_per_second": 10.146, + "eval_steps_per_second": 5.073, + "step": 6980 + }, + { + "epoch": 0.91, + "learning_rate": 1.0977307858500818e-07, + "logits/chosen": -2.392697811126709, + "logits/rejected": -2.3592922687530518, + "logps/chosen": -450.70721435546875, + "logps/rejected": -447.18206787109375, + "loss": 0.5663, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.3413830995559692, + "rewards/margins": 0.42927223443984985, + "rewards/rejected": -1.7706553936004639, + "step": 6990 + }, + { + "epoch": 0.91, + "eval_logits/chosen": -2.237020969390869, + "eval_logits/rejected": -2.244997024536133, + "eval_logps/chosen": -480.3205261230469, + "eval_logps/rejected": -492.5836181640625, + "eval_loss": 0.6000609993934631, + "eval_rewards/accuracies": 0.6725000143051147, + "eval_rewards/chosen": -1.4757108688354492, + "eval_rewards/margins": 0.43902501463890076, + "eval_rewards/rejected": -1.9147359132766724, + "eval_runtime": 196.8134, + "eval_samples_per_second": 10.162, + "eval_steps_per_second": 5.081, + "step": 6990 + }, + { + "epoch": 0.92, + "learning_rate": 1.0645049319972789e-07, + "logits/chosen": -2.440504550933838, + "logits/rejected": -2.380981922149658, + "logps/chosen": -461.13299560546875, + "logps/rejected": -475.28173828125, + "loss": 0.5424, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.4061723947525024, + "rewards/margins": 0.6738840937614441, + "rewards/rejected": -2.080056667327881, + "step": 7000 + }, + { + "epoch": 0.92, + "eval_logits/chosen": -2.237301826477051, + "eval_logits/rejected": -2.2453017234802246, + "eval_logps/chosen": -480.3714904785156, + "eval_logps/rejected": -492.61669921875, + "eval_loss": 0.600178599357605, + "eval_rewards/accuracies": 0.671999990940094, + "eval_rewards/chosen": -1.4762206077575684, + "eval_rewards/margins": 0.4388462007045746, + "eval_rewards/rejected": -1.9150665998458862, + "eval_runtime": 196.9518, + "eval_samples_per_second": 10.155, + "eval_steps_per_second": 5.077, + "step": 7000 + }, + { + "epoch": 0.92, + "learning_rate": 1.0317787324895634e-07, + "logits/chosen": -2.4781394004821777, + "logits/rejected": -2.4770684242248535, + "logps/chosen": -523.5897827148438, + "logps/rejected": -511.9645080566406, + "loss": 0.596, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -1.571276307106018, + "rewards/margins": 0.4954506456851959, + "rewards/rejected": -2.0667271614074707, + "step": 7010 + }, + { + "epoch": 0.92, + "eval_logits/chosen": -2.237297296524048, + "eval_logits/rejected": -2.24528169631958, + "eval_logps/chosen": -480.1894226074219, + "eval_logps/rejected": -492.4363708496094, + "eval_loss": 0.6001022458076477, + "eval_rewards/accuracies": 0.671500027179718, + "eval_rewards/chosen": -1.474400520324707, + "eval_rewards/margins": 0.43886318802833557, + "eval_rewards/rejected": -1.9132635593414307, + "eval_runtime": 196.8647, + "eval_samples_per_second": 10.159, + "eval_steps_per_second": 5.08, + "step": 7010 + }, + { + "epoch": 0.92, + "learning_rate": 9.995528704875635e-08, + "logits/chosen": -2.4749293327331543, + "logits/rejected": -2.5007224082946777, + "logps/chosen": -449.7769470214844, + "logps/rejected": -507.34991455078125, + "loss": 0.6293, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -1.5626052618026733, + "rewards/margins": 0.3416301906108856, + "rewards/rejected": -1.9042352437973022, + "step": 7020 + }, + { + "epoch": 0.92, + "eval_logits/chosen": -2.2375288009643555, + "eval_logits/rejected": -2.2456140518188477, + "eval_logps/chosen": -480.08721923828125, + "eval_logps/rejected": -492.32904052734375, + "eval_loss": 0.6000053286552429, + "eval_rewards/accuracies": 0.6735000014305115, + "eval_rewards/chosen": -1.4733775854110718, + "eval_rewards/margins": 0.4388121962547302, + "eval_rewards/rejected": -1.9121898412704468, + "eval_runtime": 197.1161, + "eval_samples_per_second": 10.146, + "eval_steps_per_second": 5.073, + "step": 7020 + }, + { + "epoch": 0.92, + "learning_rate": 9.678280187073452e-08, + "logits/chosen": -2.376216173171997, + "logits/rejected": -2.4128124713897705, + "logps/chosen": -457.071533203125, + "logps/rejected": -477.45062255859375, + "loss": 0.5241, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -1.2783294916152954, + "rewards/margins": 0.6212576031684875, + "rewards/rejected": -1.8995869159698486, + "step": 7030 + }, + { + "epoch": 0.92, + "eval_logits/chosen": -2.237093687057495, + "eval_logits/rejected": -2.2451350688934326, + "eval_logps/chosen": -480.2182922363281, + "eval_logps/rejected": -492.4925537109375, + "eval_loss": 0.6000233888626099, + "eval_rewards/accuracies": 0.6729999780654907, + "eval_rewards/chosen": -1.4746882915496826, + "eval_rewards/margins": 0.43913722038269043, + "eval_rewards/rejected": -1.913825273513794, + "eval_runtime": 197.0562, + "eval_samples_per_second": 10.149, + "eval_steps_per_second": 5.075, + "step": 7030 + }, + { + "epoch": 0.92, + "learning_rate": 9.366048394063549e-08, + "logits/chosen": -2.531467914581299, + "logits/rejected": -2.4971137046813965, + "logps/chosen": -474.6029357910156, + "logps/rejected": -536.8944091796875, + "loss": 0.5432, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.343653678894043, + "rewards/margins": 0.6326344013214111, + "rewards/rejected": -1.976288080215454, + "step": 7040 + }, + { + "epoch": 0.92, + "eval_logits/chosen": -2.2370364665985107, + "eval_logits/rejected": -2.2448863983154297, + "eval_logps/chosen": -480.6302490234375, + "eval_logps/rejected": -492.96478271484375, + "eval_loss": 0.5998647809028625, + "eval_rewards/accuracies": 0.6735000014305115, + "eval_rewards/chosen": -1.4788081645965576, + "eval_rewards/margins": 0.4397394359111786, + "eval_rewards/rejected": -1.9185476303100586, + "eval_runtime": 197.1021, + "eval_samples_per_second": 10.147, + "eval_steps_per_second": 5.074, + "step": 7040 + }, + { + "epoch": 0.92, + "learning_rate": 9.058839843696237e-08, + "logits/chosen": -2.4858269691467285, + "logits/rejected": -2.4405789375305176, + "logps/chosen": -480.81610107421875, + "logps/rejected": -499.29461669921875, + "loss": 0.5755, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -1.399253010749817, + "rewards/margins": 0.5345474481582642, + "rewards/rejected": -1.933800458908081, + "step": 7050 + }, + { + "epoch": 0.92, + "eval_logits/chosen": -2.2368223667144775, + "eval_logits/rejected": -2.244842767715454, + "eval_logps/chosen": -480.903564453125, + "eval_logps/rejected": -493.2392272949219, + "eval_loss": 0.6001678705215454, + "eval_rewards/accuracies": 0.671500027179718, + "eval_rewards/chosen": -1.4815417528152466, + "eval_rewards/margins": 0.439750075340271, + "eval_rewards/rejected": -1.9212918281555176, + "eval_runtime": 197.05, + "eval_samples_per_second": 10.15, + "eval_steps_per_second": 5.075, + "step": 7050 + }, + { + "epoch": 0.92, + "learning_rate": 8.756660948961299e-08, + "logits/chosen": -2.4491777420043945, + "logits/rejected": -2.463347911834717, + "logps/chosen": -449.61077880859375, + "logps/rejected": -495.1546325683594, + "loss": 0.6344, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.45897376537323, + "rewards/margins": 0.3097809851169586, + "rewards/rejected": -1.7687549591064453, + "step": 7060 + }, + { + "epoch": 0.92, + "eval_logits/chosen": -2.236321210861206, + "eval_logits/rejected": -2.244218349456787, + "eval_logps/chosen": -481.0281066894531, + "eval_logps/rejected": -493.4416809082031, + "eval_loss": 0.5998026132583618, + "eval_rewards/accuracies": 0.6744999885559082, + "eval_rewards/chosen": -1.482786774635315, + "eval_rewards/margins": 0.4405299723148346, + "eval_rewards/rejected": -1.9233167171478271, + "eval_runtime": 197.0403, + "eval_samples_per_second": 10.15, + "eval_steps_per_second": 5.075, + "step": 7060 + }, + { + "epoch": 0.93, + "learning_rate": 8.459518017854412e-08, + "logits/chosen": -2.436307907104492, + "logits/rejected": -2.4082484245300293, + "logps/chosen": -488.6837463378906, + "logps/rejected": -466.79510498046875, + "loss": 0.659, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -1.5326035022735596, + "rewards/margins": 0.22640573978424072, + "rewards/rejected": -1.7590093612670898, + "step": 7070 + }, + { + "epoch": 0.93, + "eval_logits/chosen": -2.23673677444458, + "eval_logits/rejected": -2.2447004318237305, + "eval_logps/chosen": -481.2086181640625, + "eval_logps/rejected": -493.5932312011719, + "eval_loss": 0.5999945998191833, + "eval_rewards/accuracies": 0.6735000014305115, + "eval_rewards/chosen": -1.484592318534851, + "eval_rewards/margins": 0.44023993611335754, + "eval_rewards/rejected": -1.9248321056365967, + "eval_runtime": 197.134, + "eval_samples_per_second": 10.145, + "eval_steps_per_second": 5.073, + "step": 7070 + }, + { + "epoch": 0.93, + "learning_rate": 8.167417253245213e-08, + "logits/chosen": -2.4186267852783203, + "logits/rejected": -2.365056037902832, + "logps/chosen": -457.72088623046875, + "logps/rejected": -458.7981872558594, + "loss": 0.5947, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.4014866352081299, + "rewards/margins": 0.3693445324897766, + "rewards/rejected": -1.7708311080932617, + "step": 7080 + }, + { + "epoch": 0.93, + "eval_logits/chosen": -2.235661268234253, + "eval_logits/rejected": -2.2434422969818115, + "eval_logps/chosen": -481.47943115234375, + "eval_logps/rejected": -493.9435729980469, + "eval_loss": 0.5998866558074951, + "eval_rewards/accuracies": 0.6744999885559082, + "eval_rewards/chosen": -1.487300157546997, + "eval_rewards/margins": 0.44103503227233887, + "eval_rewards/rejected": -1.928335189819336, + "eval_runtime": 197.0791, + "eval_samples_per_second": 10.148, + "eval_steps_per_second": 5.074, + "step": 7080 + }, + { + "epoch": 0.93, + "learning_rate": 7.880364752747948e-08, + "logits/chosen": -2.4743261337280273, + "logits/rejected": -2.464456558227539, + "logps/chosen": -449.85626220703125, + "logps/rejected": -484.47998046875, + "loss": 0.628, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.6029231548309326, + "rewards/margins": 0.3753909170627594, + "rewards/rejected": -1.9783141613006592, + "step": 7090 + }, + { + "epoch": 0.93, + "eval_logits/chosen": -2.2360057830810547, + "eval_logits/rejected": -2.2437164783477783, + "eval_logps/chosen": -481.5184020996094, + "eval_logps/rejected": -493.98126220703125, + "eval_loss": 0.5998888611793518, + "eval_rewards/accuracies": 0.6725000143051147, + "eval_rewards/chosen": -1.4876903295516968, + "eval_rewards/margins": 0.4410220980644226, + "eval_rewards/rejected": -1.9287123680114746, + "eval_runtime": 197.1071, + "eval_samples_per_second": 10.147, + "eval_steps_per_second": 5.073, + "step": 7090 + }, + { + "epoch": 0.93, + "learning_rate": 7.598366508594245e-08, + "logits/chosen": -2.3809356689453125, + "logits/rejected": -2.394191026687622, + "logps/chosen": -520.0443115234375, + "logps/rejected": -556.1658935546875, + "loss": 0.5261, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -1.521211862564087, + "rewards/margins": 0.6119467616081238, + "rewards/rejected": -2.1331584453582764, + "step": 7100 + }, + { + "epoch": 0.93, + "eval_logits/chosen": -2.2363462448120117, + "eval_logits/rejected": -2.2442967891693115, + "eval_logps/chosen": -481.386474609375, + "eval_logps/rejected": -493.82806396484375, + "eval_loss": 0.5999638438224792, + "eval_rewards/accuracies": 0.6729999780654907, + "eval_rewards/chosen": -1.4863704442977905, + "eval_rewards/margins": 0.44081002473831177, + "eval_rewards/rejected": -1.9271804094314575, + "eval_runtime": 197.3044, + "eval_samples_per_second": 10.137, + "eval_steps_per_second": 5.068, + "step": 7100 + }, + { + "epoch": 0.93, + "learning_rate": 7.32142840750788e-08, + "logits/chosen": -2.4369311332702637, + "logits/rejected": -2.396646499633789, + "logps/chosen": -509.541015625, + "logps/rejected": -515.9998779296875, + "loss": 0.5201, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -1.364022135734558, + "rewards/margins": 0.608812153339386, + "rewards/rejected": -1.9728343486785889, + "step": 7110 + }, + { + "epoch": 0.93, + "eval_logits/chosen": -2.236448287963867, + "eval_logits/rejected": -2.2443768978118896, + "eval_logps/chosen": -481.1351318359375, + "eval_logps/rejected": -493.5433654785156, + "eval_loss": 0.6000708937644958, + "eval_rewards/accuracies": 0.6729999780654907, + "eval_rewards/chosen": -1.4838569164276123, + "eval_rewards/margins": 0.44047674536705017, + "eval_rewards/rejected": -1.9243335723876953, + "eval_runtime": 197.1301, + "eval_samples_per_second": 10.146, + "eval_steps_per_second": 5.073, + "step": 7110 + }, + { + "epoch": 0.93, + "learning_rate": 7.049556230581872e-08, + "logits/chosen": -2.3801629543304443, + "logits/rejected": -2.3011727333068848, + "logps/chosen": -450.39837646484375, + "logps/rejected": -455.0599060058594, + "loss": 0.6504, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -1.612126111984253, + "rewards/margins": 0.34414222836494446, + "rewards/rejected": -1.9562686681747437, + "step": 7120 + }, + { + "epoch": 0.93, + "eval_logits/chosen": -2.236114740371704, + "eval_logits/rejected": -2.24397873878479, + "eval_logps/chosen": -481.0938720703125, + "eval_logps/rejected": -493.5276184082031, + "eval_loss": 0.6000164151191711, + "eval_rewards/accuracies": 0.6725000143051147, + "eval_rewards/chosen": -1.4834445714950562, + "eval_rewards/margins": 0.44073137640953064, + "eval_rewards/rejected": -1.9241758584976196, + "eval_runtime": 197.0056, + "eval_samples_per_second": 10.152, + "eval_steps_per_second": 5.076, + "step": 7120 + }, + { + "epoch": 0.93, + "learning_rate": 6.782755653158085e-08, + "logits/chosen": -2.495652437210083, + "logits/rejected": -2.4827919006347656, + "logps/chosen": -485.154296875, + "logps/rejected": -492.5819396972656, + "loss": 0.5956, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.4519439935684204, + "rewards/margins": 0.38830724358558655, + "rewards/rejected": -1.8402513265609741, + "step": 7130 + }, + { + "epoch": 0.93, + "eval_logits/chosen": -2.236171007156372, + "eval_logits/rejected": -2.2441015243530273, + "eval_logps/chosen": -480.94415283203125, + "eval_logps/rejected": -493.3447570800781, + "eval_loss": 0.6001508235931396, + "eval_rewards/accuracies": 0.6744999885559082, + "eval_rewards/chosen": -1.4819475412368774, + "eval_rewards/margins": 0.4403998851776123, + "eval_rewards/rejected": -1.9223475456237793, + "eval_runtime": 196.9017, + "eval_samples_per_second": 10.157, + "eval_steps_per_second": 5.079, + "step": 7130 + }, + { + "epoch": 0.93, + "learning_rate": 6.521032244708375e-08, + "logits/chosen": -2.3476414680480957, + "logits/rejected": -2.3627238273620605, + "logps/chosen": -479.8387145996094, + "logps/rejected": -507.6990661621094, + "loss": 0.67, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -1.5138092041015625, + "rewards/margins": 0.3506276309490204, + "rewards/rejected": -1.8644367456436157, + "step": 7140 + }, + { + "epoch": 0.93, + "eval_logits/chosen": -2.2358126640319824, + "eval_logits/rejected": -2.2438180446624756, + "eval_logps/chosen": -480.76470947265625, + "eval_logps/rejected": -493.1570739746094, + "eval_loss": 0.6001284718513489, + "eval_rewards/accuracies": 0.6754999756813049, + "eval_rewards/chosen": -1.48015296459198, + "eval_rewards/margins": 0.4403176009654999, + "eval_rewards/rejected": -1.9204705953598022, + "eval_runtime": 196.9924, + "eval_samples_per_second": 10.153, + "eval_steps_per_second": 5.076, + "step": 7140 + }, + { + "epoch": 0.94, + "learning_rate": 6.264391468718628e-08, + "logits/chosen": -2.483029842376709, + "logits/rejected": -2.448090076446533, + "logps/chosen": -470.856201171875, + "logps/rejected": -495.6634216308594, + "loss": 0.5571, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.3123161792755127, + "rewards/margins": 0.5451647043228149, + "rewards/rejected": -1.857480764389038, + "step": 7150 + }, + { + "epoch": 0.94, + "eval_logits/chosen": -2.2361068725585938, + "eval_logits/rejected": -2.244180679321289, + "eval_logps/chosen": -480.6026306152344, + "eval_logps/rejected": -492.9624938964844, + "eval_loss": 0.6001822352409363, + "eval_rewards/accuracies": 0.6744999885559082, + "eval_rewards/chosen": -1.4785321950912476, + "eval_rewards/margins": 0.4399925172328949, + "eval_rewards/rejected": -1.9185247421264648, + "eval_runtime": 197.2989, + "eval_samples_per_second": 10.137, + "eval_steps_per_second": 5.068, + "step": 7150 + }, + { + "epoch": 0.94, + "learning_rate": 6.012838682574462e-08, + "logits/chosen": -2.557973861694336, + "logits/rejected": -2.5231175422668457, + "logps/chosen": -490.7843322753906, + "logps/rejected": -462.07733154296875, + "loss": 0.6067, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -1.563360571861267, + "rewards/margins": 0.4491299092769623, + "rewards/rejected": -2.0124905109405518, + "step": 7160 + }, + { + "epoch": 0.94, + "eval_logits/chosen": -2.23598575592041, + "eval_logits/rejected": -2.2441365718841553, + "eval_logps/chosen": -480.674072265625, + "eval_logps/rejected": -493.0469665527344, + "eval_loss": 0.6001749038696289, + "eval_rewards/accuracies": 0.6759999990463257, + "eval_rewards/chosen": -1.4792464971542358, + "eval_rewards/margins": 0.44012314081192017, + "eval_rewards/rejected": -1.9193694591522217, + "eval_runtime": 197.017, + "eval_samples_per_second": 10.151, + "eval_steps_per_second": 5.076, + "step": 7160 + }, + { + "epoch": 0.94, + "learning_rate": 5.766379137449624e-08, + "logits/chosen": -2.5023579597473145, + "logits/rejected": -2.476633310317993, + "logps/chosen": -426.63067626953125, + "logps/rejected": -493.447265625, + "loss": 0.567, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -1.3543007373809814, + "rewards/margins": 0.5005929470062256, + "rewards/rejected": -1.854893684387207, + "step": 7170 + }, + { + "epoch": 0.94, + "eval_logits/chosen": -2.236705780029297, + "eval_logits/rejected": -2.2447779178619385, + "eval_logps/chosen": -480.6815185546875, + "eval_logps/rejected": -493.0613708496094, + "eval_loss": 0.6001842021942139, + "eval_rewards/accuracies": 0.6725000143051147, + "eval_rewards/chosen": -1.4793212413787842, + "eval_rewards/margins": 0.44019201397895813, + "eval_rewards/rejected": -1.91951322555542, + "eval_runtime": 196.9448, + "eval_samples_per_second": 10.155, + "eval_steps_per_second": 5.078, + "step": 7170 + }, + { + "epoch": 0.94, + "learning_rate": 5.525017978196295e-08, + "logits/chosen": -2.523089647293091, + "logits/rejected": -2.468512773513794, + "logps/chosen": -509.1969299316406, + "logps/rejected": -495.98358154296875, + "loss": 0.5853, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.4794842004776, + "rewards/margins": 0.548646092414856, + "rewards/rejected": -2.028130292892456, + "step": 7180 + }, + { + "epoch": 0.94, + "eval_logits/chosen": -2.235506296157837, + "eval_logits/rejected": -2.2435786724090576, + "eval_logps/chosen": -480.6484069824219, + "eval_logps/rejected": -493.06866455078125, + "eval_loss": 0.5999549627304077, + "eval_rewards/accuracies": 0.6740000247955322, + "eval_rewards/chosen": -1.4789897203445435, + "eval_rewards/margins": 0.44059687852859497, + "eval_rewards/rejected": -1.9195865392684937, + "eval_runtime": 196.8598, + "eval_samples_per_second": 10.16, + "eval_steps_per_second": 5.08, + "step": 7180 + }, + { + "epoch": 0.94, + "learning_rate": 5.288760243237545e-08, + "logits/chosen": -2.4585039615631104, + "logits/rejected": -2.401052951812744, + "logps/chosen": -526.7545166015625, + "logps/rejected": -510.090087890625, + "loss": 0.5763, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -1.4466525316238403, + "rewards/margins": 0.5509330630302429, + "rewards/rejected": -1.9975858926773071, + "step": 7190 + }, + { + "epoch": 0.94, + "eval_logits/chosen": -2.235445976257324, + "eval_logits/rejected": -2.243375062942505, + "eval_logps/chosen": -480.7488098144531, + "eval_logps/rejected": -493.1495666503906, + "eval_loss": 0.6000896096229553, + "eval_rewards/accuracies": 0.675000011920929, + "eval_rewards/chosen": -1.4799941778182983, + "eval_rewards/margins": 0.44040152430534363, + "eval_rewards/rejected": -1.9203956127166748, + "eval_runtime": 197.0246, + "eval_samples_per_second": 10.151, + "eval_steps_per_second": 5.076, + "step": 7190 + }, + { + "epoch": 0.94, + "learning_rate": 5.0576108644623536e-08, + "logits/chosen": -2.3365859985351562, + "logits/rejected": -2.2916181087493896, + "logps/chosen": -525.1251220703125, + "logps/rejected": -482.224853515625, + "loss": 0.6433, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.5160186290740967, + "rewards/margins": 0.3626102805137634, + "rewards/rejected": -1.8786289691925049, + "step": 7200 + }, + { + "epoch": 0.94, + "eval_logits/chosen": -2.235518455505371, + "eval_logits/rejected": -2.2436070442199707, + "eval_logps/chosen": -480.7719421386719, + "eval_logps/rejected": -493.13995361328125, + "eval_loss": 0.600199282169342, + "eval_rewards/accuracies": 0.6744999885559082, + "eval_rewards/chosen": -1.4802253246307373, + "eval_rewards/margins": 0.4400743544101715, + "eval_rewards/rejected": -1.9202996492385864, + "eval_runtime": 197.1412, + "eval_samples_per_second": 10.145, + "eval_steps_per_second": 5.073, + "step": 7200 + }, + { + "epoch": 0.94, + "learning_rate": 4.8315746671225296e-08, + "logits/chosen": -2.4270944595336914, + "logits/rejected": -2.376451015472412, + "logps/chosen": -497.7478942871094, + "logps/rejected": -515.0045776367188, + "loss": 0.5246, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -1.2914670705795288, + "rewards/margins": 0.5625424385070801, + "rewards/rejected": -1.8540096282958984, + "step": 7210 + }, + { + "epoch": 0.94, + "eval_logits/chosen": -2.2360446453094482, + "eval_logits/rejected": -2.2440760135650635, + "eval_logps/chosen": -480.86669921875, + "eval_logps/rejected": -493.27886962890625, + "eval_loss": 0.6001480221748352, + "eval_rewards/accuracies": 0.6744999885559082, + "eval_rewards/chosen": -1.4811723232269287, + "eval_rewards/margins": 0.4405162036418915, + "eval_rewards/rejected": -1.9216883182525635, + "eval_runtime": 196.9347, + "eval_samples_per_second": 10.156, + "eval_steps_per_second": 5.078, + "step": 7210 + }, + { + "epoch": 0.94, + "learning_rate": 4.6106563697320695e-08, + "logits/chosen": -2.457677125930786, + "logits/rejected": -2.44500470161438, + "logps/chosen": -434.3646545410156, + "logps/rejected": -441.17852783203125, + "loss": 0.5472, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -1.2899720668792725, + "rewards/margins": 0.6157953143119812, + "rewards/rejected": -1.9057674407958984, + "step": 7220 + }, + { + "epoch": 0.94, + "eval_logits/chosen": -2.2354533672332764, + "eval_logits/rejected": -2.2435836791992188, + "eval_logps/chosen": -480.8843688964844, + "eval_logps/rejected": -493.26123046875, + "eval_loss": 0.6003187894821167, + "eval_rewards/accuracies": 0.6744999885559082, + "eval_rewards/chosen": -1.4813494682312012, + "eval_rewards/margins": 0.44016218185424805, + "eval_rewards/rejected": -1.9215115308761597, + "eval_runtime": 197.2506, + "eval_samples_per_second": 10.139, + "eval_steps_per_second": 5.07, + "step": 7220 + }, + { + "epoch": 0.95, + "learning_rate": 4.394860583968624e-08, + "logits/chosen": -2.489647150039673, + "logits/rejected": -2.498183250427246, + "logps/chosen": -397.114990234375, + "logps/rejected": -462.450927734375, + "loss": 0.6258, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -1.455381155014038, + "rewards/margins": 0.3950461447238922, + "rewards/rejected": -1.850427269935608, + "step": 7230 + }, + { + "epoch": 0.95, + "eval_logits/chosen": -2.2355403900146484, + "eval_logits/rejected": -2.2435503005981445, + "eval_logps/chosen": -480.9859313964844, + "eval_logps/rejected": -493.4081115722656, + "eval_loss": 0.6001339554786682, + "eval_rewards/accuracies": 0.671500027179718, + "eval_rewards/chosen": -1.4823654890060425, + "eval_rewards/margins": 0.4406152665615082, + "eval_rewards/rejected": -1.9229806661605835, + "eval_runtime": 196.8587, + "eval_samples_per_second": 10.16, + "eval_steps_per_second": 5.08, + "step": 7230 + }, + { + "epoch": 0.95, + "learning_rate": 4.1841918145771874e-08, + "logits/chosen": -2.379164218902588, + "logits/rejected": -2.3571412563323975, + "logps/chosen": -482.9966735839844, + "logps/rejected": -512.3907470703125, + "loss": 0.5519, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.4066792726516724, + "rewards/margins": 0.5344252586364746, + "rewards/rejected": -1.941104531288147, + "step": 7240 + }, + { + "epoch": 0.95, + "eval_logits/chosen": -2.2356574535369873, + "eval_logits/rejected": -2.243659496307373, + "eval_logps/chosen": -480.8147888183594, + "eval_logps/rejected": -493.2166748046875, + "eval_loss": 0.600143313407898, + "eval_rewards/accuracies": 0.6725000143051147, + "eval_rewards/chosen": -1.4806538820266724, + "eval_rewards/margins": 0.4404126703739166, + "eval_rewards/rejected": -1.921066403388977, + "eval_runtime": 197.1276, + "eval_samples_per_second": 10.146, + "eval_steps_per_second": 5.073, + "step": 7240 + }, + { + "epoch": 0.95, + "learning_rate": 3.978654459276088e-08, + "logits/chosen": -2.5380361080169678, + "logits/rejected": -2.518141269683838, + "logps/chosen": -526.7445068359375, + "logps/rejected": -505.240234375, + "loss": 0.5803, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.4389846324920654, + "rewards/margins": 0.531572163105011, + "rewards/rejected": -1.9705572128295898, + "step": 7250 + }, + { + "epoch": 0.95, + "eval_logits/chosen": -2.235908269882202, + "eval_logits/rejected": -2.2438931465148926, + "eval_logps/chosen": -480.80810546875, + "eval_logps/rejected": -493.21368408203125, + "eval_loss": 0.600168764591217, + "eval_rewards/accuracies": 0.6754999756813049, + "eval_rewards/chosen": -1.4805866479873657, + "eval_rewards/margins": 0.4404502213001251, + "eval_rewards/rejected": -1.921036958694458, + "eval_runtime": 197.0241, + "eval_samples_per_second": 10.151, + "eval_steps_per_second": 5.076, + "step": 7250 + }, + { + "epoch": 0.95, + "learning_rate": 3.778252808665284e-08, + "logits/chosen": -2.5763096809387207, + "logits/rejected": -2.581481456756592, + "logps/chosen": -542.89453125, + "logps/rejected": -504.26971435546875, + "loss": 0.566, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.4922627210617065, + "rewards/margins": 0.5086614489555359, + "rewards/rejected": -2.0009243488311768, + "step": 7260 + }, + { + "epoch": 0.95, + "eval_logits/chosen": -2.235844612121582, + "eval_logits/rejected": -2.2437777519226074, + "eval_logps/chosen": -480.9935607910156, + "eval_logps/rejected": -493.4316101074219, + "eval_loss": 0.6000894904136658, + "eval_rewards/accuracies": 0.6759999990463257, + "eval_rewards/chosen": -1.4824414253234863, + "eval_rewards/margins": 0.4407746493816376, + "eval_rewards/rejected": -1.9232161045074463, + "eval_runtime": 196.9012, + "eval_samples_per_second": 10.157, + "eval_steps_per_second": 5.079, + "step": 7260 + }, + { + "epoch": 0.95, + "learning_rate": 3.5829910461366023e-08, + "logits/chosen": -2.401991367340088, + "logits/rejected": -2.417039632797241, + "logps/chosen": -438.27191162109375, + "logps/rejected": -485.9276428222656, + "loss": 0.5685, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.3814996480941772, + "rewards/margins": 0.601586639881134, + "rewards/rejected": -1.983086347579956, + "step": 7270 + }, + { + "epoch": 0.95, + "eval_logits/chosen": -2.234957218170166, + "eval_logits/rejected": -2.242936611175537, + "eval_logps/chosen": -481.03302001953125, + "eval_logps/rejected": -493.4700622558594, + "eval_loss": 0.6001612544059753, + "eval_rewards/accuracies": 0.6744999885559082, + "eval_rewards/chosen": -1.482836127281189, + "eval_rewards/margins": 0.44076380133628845, + "eval_rewards/rejected": -1.9235999584197998, + "eval_runtime": 196.9569, + "eval_samples_per_second": 10.155, + "eval_steps_per_second": 5.077, + "step": 7270 + }, + { + "epoch": 0.95, + "learning_rate": 3.39287324778656e-08, + "logits/chosen": -2.5495078563690186, + "logits/rejected": -2.5458648204803467, + "logps/chosen": -551.0210571289062, + "logps/rejected": -540.4073486328125, + "loss": 0.6324, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -1.5627120733261108, + "rewards/margins": 0.4206581115722656, + "rewards/rejected": -1.9833701848983765, + "step": 7280 + }, + { + "epoch": 0.95, + "eval_logits/chosen": -2.2355172634124756, + "eval_logits/rejected": -2.2434380054473877, + "eval_logps/chosen": -481.1297607421875, + "eval_logps/rejected": -493.6228332519531, + "eval_loss": 0.6000157594680786, + "eval_rewards/accuracies": 0.6740000247955322, + "eval_rewards/chosen": -1.4838035106658936, + "eval_rewards/margins": 0.44132480025291443, + "eval_rewards/rejected": -1.9251282215118408, + "eval_runtime": 196.9853, + "eval_samples_per_second": 10.153, + "eval_steps_per_second": 5.077, + "step": 7280 + }, + { + "epoch": 0.95, + "learning_rate": 3.207903382331262e-08, + "logits/chosen": -2.4150776863098145, + "logits/rejected": -2.4447875022888184, + "logps/chosen": -505.7635192871094, + "logps/rejected": -486.01513671875, + "loss": 0.6182, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -1.3768532276153564, + "rewards/margins": 0.3623526394367218, + "rewards/rejected": -1.7392059564590454, + "step": 7290 + }, + { + "epoch": 0.95, + "eval_logits/chosen": -2.235156297683716, + "eval_logits/rejected": -2.2431724071502686, + "eval_logps/chosen": -481.0028076171875, + "eval_logps/rejected": -493.4009704589844, + "eval_loss": 0.600363552570343, + "eval_rewards/accuracies": 0.6735000014305115, + "eval_rewards/chosen": -1.482533574104309, + "eval_rewards/margins": 0.4403752386569977, + "eval_rewards/rejected": -1.9229090213775635, + "eval_runtime": 197.2199, + "eval_samples_per_second": 10.141, + "eval_steps_per_second": 5.07, + "step": 7290 + }, + { + "epoch": 0.96, + "learning_rate": 3.028085311023443e-08, + "logits/chosen": -2.3501362800598145, + "logits/rejected": -2.3338279724121094, + "logps/chosen": -482.4646911621094, + "logps/rejected": -484.25885009765625, + "loss": 0.5617, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.3721768856048584, + "rewards/margins": 0.5388258695602417, + "rewards/rejected": -1.9110028743743896, + "step": 7300 + }, + { + "epoch": 0.96, + "eval_logits/chosen": -2.2347323894500732, + "eval_logits/rejected": -2.242525339126587, + "eval_logps/chosen": -481.16748046875, + "eval_logps/rejected": -493.6541442871094, + "eval_loss": 0.6000457406044006, + "eval_rewards/accuracies": 0.675000011920929, + "eval_rewards/chosen": -1.4841809272766113, + "eval_rewards/margins": 0.4412601888179779, + "eval_rewards/rejected": -1.925441026687622, + "eval_runtime": 197.1805, + "eval_samples_per_second": 10.143, + "eval_steps_per_second": 5.071, + "step": 7300 + }, + { + "epoch": 0.96, + "learning_rate": 2.8534227875720576e-08, + "logits/chosen": -2.493821620941162, + "logits/rejected": -2.4801414012908936, + "logps/chosen": -466.4181213378906, + "logps/rejected": -507.5314025878906, + "loss": 0.5804, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.4337350130081177, + "rewards/margins": 0.4854932427406311, + "rewards/rejected": -1.9192283153533936, + "step": 7310 + }, + { + "epoch": 0.96, + "eval_logits/chosen": -2.2344577312469482, + "eval_logits/rejected": -2.2422938346862793, + "eval_logps/chosen": -481.3323669433594, + "eval_logps/rejected": -493.8113708496094, + "eval_loss": 0.600138783454895, + "eval_rewards/accuracies": 0.6754999756813049, + "eval_rewards/chosen": -1.4858298301696777, + "eval_rewards/margins": 0.44118383526802063, + "eval_rewards/rejected": -1.927013635635376, + "eval_runtime": 196.9791, + "eval_samples_per_second": 10.153, + "eval_steps_per_second": 5.077, + "step": 7310 + }, + { + "epoch": 0.96, + "learning_rate": 2.683919458063705e-08, + "logits/chosen": -2.486636161804199, + "logits/rejected": -2.414386749267578, + "logps/chosen": -405.9773864746094, + "logps/rejected": -394.66607666015625, + "loss": 0.5918, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -1.4356229305267334, + "rewards/margins": 0.4635559916496277, + "rewards/rejected": -1.8991791009902954, + "step": 7320 + }, + { + "epoch": 0.96, + "eval_logits/chosen": -2.2344415187835693, + "eval_logits/rejected": -2.242302656173706, + "eval_logps/chosen": -481.2685546875, + "eval_logps/rejected": -493.75445556640625, + "eval_loss": 0.6001518368721008, + "eval_rewards/accuracies": 0.6754999756813049, + "eval_rewards/chosen": -1.4851912260055542, + "eval_rewards/margins": 0.4412528872489929, + "eval_rewards/rejected": -1.9264440536499023, + "eval_runtime": 197.6057, + "eval_samples_per_second": 10.121, + "eval_steps_per_second": 5.061, + "step": 7320 + }, + { + "epoch": 0.96, + "learning_rate": 2.5195788608866345e-08, + "logits/chosen": -2.381263017654419, + "logits/rejected": -2.317131280899048, + "logps/chosen": -571.0789794921875, + "logps/rejected": -538.0128784179688, + "loss": 0.5686, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.586227297782898, + "rewards/margins": 0.5877612233161926, + "rewards/rejected": -2.1739885807037354, + "step": 7330 + }, + { + "epoch": 0.96, + "eval_logits/chosen": -2.2345898151397705, + "eval_logits/rejected": -2.242624282836914, + "eval_logps/chosen": -481.3023986816406, + "eval_logps/rejected": -493.7621154785156, + "eval_loss": 0.6001543998718262, + "eval_rewards/accuracies": 0.6754999756813049, + "eval_rewards/chosen": -1.4855300188064575, + "eval_rewards/margins": 0.4409913420677185, + "eval_rewards/rejected": -1.9265215396881104, + "eval_runtime": 197.0845, + "eval_samples_per_second": 10.148, + "eval_steps_per_second": 5.074, + "step": 7330 + }, + { + "epoch": 0.96, + "learning_rate": 2.3604044266569426e-08, + "logits/chosen": -2.4356443881988525, + "logits/rejected": -2.3660850524902344, + "logps/chosen": -499.91949462890625, + "logps/rejected": -484.0068359375, + "loss": 0.6209, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.5768749713897705, + "rewards/margins": 0.38860636949539185, + "rewards/rejected": -1.9654814004898071, + "step": 7340 + }, + { + "epoch": 0.96, + "eval_logits/chosen": -2.2345218658447266, + "eval_logits/rejected": -2.2423171997070312, + "eval_logps/chosen": -481.2668762207031, + "eval_logps/rejected": -493.7497863769531, + "eval_loss": 0.6001455783843994, + "eval_rewards/accuracies": 0.6735000014305115, + "eval_rewards/chosen": -1.485174536705017, + "eval_rewards/margins": 0.4412229061126709, + "eval_rewards/rejected": -1.926397442817688, + "eval_runtime": 196.7867, + "eval_samples_per_second": 10.163, + "eval_steps_per_second": 5.082, + "step": 7340 + }, + { + "epoch": 0.96, + "learning_rate": 2.2063994781468256e-08, + "logits/chosen": -2.3938355445861816, + "logits/rejected": -2.4135689735412598, + "logps/chosen": -474.46148681640625, + "logps/rejected": -478.35601806640625, + "loss": 0.6103, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.480282187461853, + "rewards/margins": 0.47738590836524963, + "rewards/rejected": -1.9576680660247803, + "step": 7350 + }, + { + "epoch": 0.96, + "eval_logits/chosen": -2.234881639480591, + "eval_logits/rejected": -2.2427618503570557, + "eval_logps/chosen": -481.1435852050781, + "eval_logps/rejected": -493.56268310546875, + "eval_loss": 0.600322961807251, + "eval_rewards/accuracies": 0.6765000224113464, + "eval_rewards/chosen": -1.4839422702789307, + "eval_rewards/margins": 0.44058436155319214, + "eval_rewards/rejected": -1.9245266914367676, + "eval_runtime": 196.7921, + "eval_samples_per_second": 10.163, + "eval_steps_per_second": 5.082, + "step": 7350 + }, + { + "epoch": 0.96, + "learning_rate": 2.057567230215246e-08, + "logits/chosen": -2.5192952156066895, + "logits/rejected": -2.5412585735321045, + "logps/chosen": -486.6336364746094, + "logps/rejected": -526.0662841796875, + "loss": 0.6426, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -1.5905240774154663, + "rewards/margins": 0.31872352957725525, + "rewards/rejected": -1.9092477560043335, + "step": 7360 + }, + { + "epoch": 0.96, + "eval_logits/chosen": -2.2351341247558594, + "eval_logits/rejected": -2.2430951595306396, + "eval_logps/chosen": -481.04461669921875, + "eval_logps/rejected": -493.49676513671875, + "eval_loss": 0.6000579595565796, + "eval_rewards/accuracies": 0.6735000014305115, + "eval_rewards/chosen": -1.4829521179199219, + "eval_rewards/margins": 0.44091513752937317, + "eval_rewards/rejected": -1.9238673448562622, + "eval_runtime": 196.971, + "eval_samples_per_second": 10.154, + "eval_steps_per_second": 5.077, + "step": 7360 + }, + { + "epoch": 0.96, + "learning_rate": 1.9139107897409303e-08, + "logits/chosen": -2.3607892990112305, + "logits/rejected": -2.328470468521118, + "logps/chosen": -502.33673095703125, + "logps/rejected": -484.7545471191406, + "loss": 0.5438, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.4793694019317627, + "rewards/margins": 0.5975691080093384, + "rewards/rejected": -2.0769386291503906, + "step": 7370 + }, + { + "epoch": 0.96, + "eval_logits/chosen": -2.2346956729888916, + "eval_logits/rejected": -2.2427361011505127, + "eval_logps/chosen": -481.18170166015625, + "eval_logps/rejected": -493.6324768066406, + "eval_loss": 0.600149393081665, + "eval_rewards/accuracies": 0.671999990940094, + "eval_rewards/chosen": -1.4843229055404663, + "eval_rewards/margins": 0.4409013092517853, + "eval_rewards/rejected": -1.9252241849899292, + "eval_runtime": 196.8729, + "eval_samples_per_second": 10.159, + "eval_steps_per_second": 5.079, + "step": 7370 + }, + { + "epoch": 0.97, + "learning_rate": 1.7754331555573656e-08, + "logits/chosen": -2.5656070709228516, + "logits/rejected": -2.546877384185791, + "logps/chosen": -496.11834716796875, + "logps/rejected": -562.813232421875, + "loss": 0.6082, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.4974877834320068, + "rewards/margins": 0.4004742503166199, + "rewards/rejected": -1.897962212562561, + "step": 7380 + }, + { + "epoch": 0.97, + "eval_logits/chosen": -2.235090970993042, + "eval_logits/rejected": -2.2430014610290527, + "eval_logps/chosen": -480.996826171875, + "eval_logps/rejected": -493.45916748046875, + "eval_loss": 0.6000385880470276, + "eval_rewards/accuracies": 0.6735000014305115, + "eval_rewards/chosen": -1.4824742078781128, + "eval_rewards/margins": 0.44101738929748535, + "eval_rewards/rejected": -1.9234915971755981, + "eval_runtime": 196.9341, + "eval_samples_per_second": 10.156, + "eval_steps_per_second": 5.078, + "step": 7380 + }, + { + "epoch": 0.97, + "learning_rate": 1.642137218390294e-08, + "logits/chosen": -2.5074877738952637, + "logits/rejected": -2.4454264640808105, + "logps/chosen": -510.3954162597656, + "logps/rejected": -490.68426513671875, + "loss": 0.6119, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.5543279647827148, + "rewards/margins": 0.4863724708557129, + "rewards/rejected": -2.0407004356384277, + "step": 7390 + }, + { + "epoch": 0.97, + "eval_logits/chosen": -2.235023260116577, + "eval_logits/rejected": -2.2430419921875, + "eval_logps/chosen": -480.9540710449219, + "eval_logps/rejected": -493.37200927734375, + "eval_loss": 0.6001395583152771, + "eval_rewards/accuracies": 0.6725000143051147, + "eval_rewards/chosen": -1.4820466041564941, + "eval_rewards/margins": 0.44057348370552063, + "eval_rewards/rejected": -1.9226198196411133, + "eval_runtime": 197.0167, + "eval_samples_per_second": 10.151, + "eval_steps_per_second": 5.076, + "step": 7390 + }, + { + "epoch": 0.97, + "learning_rate": 1.514025760797344e-08, + "logits/chosen": -2.5765981674194336, + "logits/rejected": -2.5297365188598633, + "logps/chosen": -520.4789428710938, + "logps/rejected": -500.82781982421875, + "loss": 0.5542, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.3169505596160889, + "rewards/margins": 0.6009668111801147, + "rewards/rejected": -1.917917251586914, + "step": 7400 + }, + { + "epoch": 0.97, + "eval_logits/chosen": -2.2352027893066406, + "eval_logits/rejected": -2.2430360317230225, + "eval_logps/chosen": -480.98291015625, + "eval_logps/rejected": -493.39874267578125, + "eval_loss": 0.6002033948898315, + "eval_rewards/accuracies": 0.6740000247955322, + "eval_rewards/chosen": -1.4823347330093384, + "eval_rewards/margins": 0.44055286049842834, + "eval_rewards/rejected": -1.9228876829147339, + "eval_runtime": 196.9532, + "eval_samples_per_second": 10.155, + "eval_steps_per_second": 5.077, + "step": 7400 + }, + { + "epoch": 0.97, + "learning_rate": 1.3911014571098835e-08, + "logits/chosen": -2.4495015144348145, + "logits/rejected": -2.454916477203369, + "logps/chosen": -452.8680725097656, + "logps/rejected": -499.41082763671875, + "loss": 0.6158, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.4365514516830444, + "rewards/margins": 0.3518769443035126, + "rewards/rejected": -1.7884283065795898, + "step": 7410 + }, + { + "epoch": 0.97, + "eval_logits/chosen": -2.2354750633239746, + "eval_logits/rejected": -2.2434208393096924, + "eval_logps/chosen": -480.9961853027344, + "eval_logps/rejected": -493.46746826171875, + "eval_loss": 0.5999510288238525, + "eval_rewards/accuracies": 0.6754999756813049, + "eval_rewards/chosen": -1.4824678897857666, + "eval_rewards/margins": 0.4411066174507141, + "eval_rewards/rejected": -1.923574686050415, + "eval_runtime": 197.1394, + "eval_samples_per_second": 10.145, + "eval_steps_per_second": 5.073, + "step": 7410 + }, + { + "epoch": 0.97, + "learning_rate": 1.2733668733773685e-08, + "logits/chosen": -2.4694085121154785, + "logits/rejected": -2.442884922027588, + "logps/chosen": -470.5113220214844, + "logps/rejected": -481.81854248046875, + "loss": 0.5374, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.4029333591461182, + "rewards/margins": 0.563439667224884, + "rewards/rejected": -1.9663728475570679, + "step": 7420 + }, + { + "epoch": 0.97, + "eval_logits/chosen": -2.2354135513305664, + "eval_logits/rejected": -2.2434794902801514, + "eval_logps/chosen": -481.0262145996094, + "eval_logps/rejected": -493.44000244140625, + "eval_loss": 0.6002059578895569, + "eval_rewards/accuracies": 0.6740000247955322, + "eval_rewards/chosen": -1.4827677011489868, + "eval_rewards/margins": 0.44053229689598083, + "eval_rewards/rejected": -1.923299789428711, + "eval_runtime": 197.0352, + "eval_samples_per_second": 10.15, + "eval_steps_per_second": 5.075, + "step": 7420 + }, + { + "epoch": 0.97, + "learning_rate": 1.160824467313526e-08, + "logits/chosen": -2.4775195121765137, + "logits/rejected": -2.447704792022705, + "logps/chosen": -536.0025634765625, + "logps/rejected": -557.3277587890625, + "loss": 0.5666, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.443741798400879, + "rewards/margins": 0.5521557331085205, + "rewards/rejected": -1.9958975315093994, + "step": 7430 + }, + { + "epoch": 0.97, + "eval_logits/chosen": -2.2347254753112793, + "eval_logits/rejected": -2.242621898651123, + "eval_logps/chosen": -481.0495910644531, + "eval_logps/rejected": -493.4919738769531, + "eval_loss": 0.600189745426178, + "eval_rewards/accuracies": 0.671999990940094, + "eval_rewards/chosen": -1.4830018281936646, + "eval_rewards/margins": 0.44081735610961914, + "eval_rewards/rejected": -1.9238191843032837, + "eval_runtime": 197.1193, + "eval_samples_per_second": 10.146, + "eval_steps_per_second": 5.073, + "step": 7430 + }, + { + "epoch": 0.97, + "learning_rate": 1.0534765882453113e-08, + "logits/chosen": -2.5495553016662598, + "logits/rejected": -2.5306897163391113, + "logps/chosen": -444.939453125, + "logps/rejected": -473.026123046875, + "loss": 0.5652, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.3965262174606323, + "rewards/margins": 0.45321035385131836, + "rewards/rejected": -1.8497365713119507, + "step": 7440 + }, + { + "epoch": 0.97, + "eval_logits/chosen": -2.2343382835388184, + "eval_logits/rejected": -2.2423436641693115, + "eval_logps/chosen": -480.95867919921875, + "eval_logps/rejected": -493.39239501953125, + "eval_loss": 0.6001228094100952, + "eval_rewards/accuracies": 0.6744999885559082, + "eval_rewards/chosen": -1.4820924997329712, + "eval_rewards/margins": 0.4407311975955963, + "eval_rewards/rejected": -1.9228236675262451, + "eval_runtime": 197.1931, + "eval_samples_per_second": 10.142, + "eval_steps_per_second": 5.071, + "step": 7440 + }, + { + "epoch": 0.97, + "learning_rate": 9.513254770636138e-09, + "logits/chosen": -2.504429340362549, + "logits/rejected": -2.4770166873931885, + "logps/chosen": -543.8363037109375, + "logps/rejected": -550.0518188476562, + "loss": 0.6496, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.6499207019805908, + "rewards/margins": 0.3239971101284027, + "rewards/rejected": -1.9739177227020264, + "step": 7450 + }, + { + "epoch": 0.97, + "eval_logits/chosen": -2.2349491119384766, + "eval_logits/rejected": -2.242962598800659, + "eval_logps/chosen": -480.9340515136719, + "eval_logps/rejected": -493.3950500488281, + "eval_loss": 0.6000087857246399, + "eval_rewards/accuracies": 0.6740000247955322, + "eval_rewards/chosen": -1.4818464517593384, + "eval_rewards/margins": 0.4410039186477661, + "eval_rewards/rejected": -1.922850489616394, + "eval_runtime": 196.8291, + "eval_samples_per_second": 10.161, + "eval_steps_per_second": 5.081, + "step": 7450 + }, + { + "epoch": 0.98, + "learning_rate": 8.543732661767113e-09, + "logits/chosen": -2.437833786010742, + "logits/rejected": -2.4614272117614746, + "logps/chosen": -493.88262939453125, + "logps/rejected": -533.1986083984375, + "loss": 0.6299, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.4723308086395264, + "rewards/margins": 0.3506353795528412, + "rewards/rejected": -1.8229663372039795, + "step": 7460 + }, + { + "epoch": 0.98, + "eval_logits/chosen": -2.234743356704712, + "eval_logits/rejected": -2.2426867485046387, + "eval_logps/chosen": -481.0391845703125, + "eval_logps/rejected": -493.4858093261719, + "eval_loss": 0.5999827980995178, + "eval_rewards/accuracies": 0.671500027179718, + "eval_rewards/chosen": -1.4828983545303345, + "eval_rewards/margins": 0.44085952639579773, + "eval_rewards/rejected": -1.923757791519165, + "eval_runtime": 197.1407, + "eval_samples_per_second": 10.145, + "eval_steps_per_second": 5.073, + "step": 7460 + }, + { + "epoch": 0.98, + "learning_rate": 7.626219794655553e-09, + "logits/chosen": -2.424541473388672, + "logits/rejected": -2.410937547683716, + "logps/chosen": -449.8960876464844, + "logps/rejected": -487.70965576171875, + "loss": 0.5913, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -1.3779528141021729, + "rewards/margins": 0.4145236015319824, + "rewards/rejected": -1.7924764156341553, + "step": 7470 + }, + { + "epoch": 0.98, + "eval_logits/chosen": -2.2347500324249268, + "eval_logits/rejected": -2.2425975799560547, + "eval_logps/chosen": -481.0333251953125, + "eval_logps/rejected": -493.48687744140625, + "eval_loss": 0.6000844836235046, + "eval_rewards/accuracies": 0.6725000143051147, + "eval_rewards/chosen": -1.4828383922576904, + "eval_rewards/margins": 0.44092994928359985, + "eval_rewards/rejected": -1.9237682819366455, + "eval_runtime": 197.0357, + "eval_samples_per_second": 10.15, + "eval_steps_per_second": 5.075, + "step": 7470 + }, + { + "epoch": 0.98, + "learning_rate": 6.7607353224163896e-09, + "logits/chosen": -2.494070053100586, + "logits/rejected": -2.460822582244873, + "logps/chosen": -476.69012451171875, + "logps/rejected": -475.34375, + "loss": 0.5809, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -1.3975374698638916, + "rewards/margins": 0.47304433584213257, + "rewards/rejected": -1.870581865310669, + "step": 7480 + }, + { + "epoch": 0.98, + "eval_logits/chosen": -2.235112190246582, + "eval_logits/rejected": -2.243116855621338, + "eval_logps/chosen": -481.0243835449219, + "eval_logps/rejected": -493.4410705566406, + "eval_loss": 0.6001359224319458, + "eval_rewards/accuracies": 0.6744999885559082, + "eval_rewards/chosen": -1.482749342918396, + "eval_rewards/margins": 0.4405609965324402, + "eval_rewards/rejected": -1.923310399055481, + "eval_runtime": 197.066, + "eval_samples_per_second": 10.149, + "eval_steps_per_second": 5.074, + "step": 7480 + }, + { + "epoch": 0.98, + "learning_rate": 5.947297312070554e-09, + "logits/chosen": -2.3596110343933105, + "logits/rejected": -2.344242811203003, + "logps/chosen": -517.79931640625, + "logps/rejected": -497.13067626953125, + "loss": 0.522, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -1.4211236238479614, + "rewards/margins": 0.6752224564552307, + "rewards/rejected": -2.096346139907837, + "step": 7490 + }, + { + "epoch": 0.98, + "eval_logits/chosen": -2.2345526218414307, + "eval_logits/rejected": -2.2424867153167725, + "eval_logps/chosen": -481.04962158203125, + "eval_logps/rejected": -493.4761657714844, + "eval_loss": 0.6002518534660339, + "eval_rewards/accuracies": 0.6744999885559082, + "eval_rewards/chosen": -1.483001947402954, + "eval_rewards/margins": 0.4406592547893524, + "eval_rewards/rejected": -1.923661231994629, + "eval_runtime": 197.026, + "eval_samples_per_second": 10.151, + "eval_steps_per_second": 5.075, + "step": 7490 + }, + { + "epoch": 0.98, + "learning_rate": 5.185922744166128e-09, + "logits/chosen": -2.4216926097869873, + "logits/rejected": -2.4630672931671143, + "logps/chosen": -484.6437072753906, + "logps/rejected": -527.0128784179688, + "loss": 0.4985, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -1.3571288585662842, + "rewards/margins": 0.7009686827659607, + "rewards/rejected": -2.0580973625183105, + "step": 7500 + }, + { + "epoch": 0.98, + "eval_logits/chosen": -2.2345950603485107, + "eval_logits/rejected": -2.2425150871276855, + "eval_logps/chosen": -481.0412292480469, + "eval_logps/rejected": -493.51708984375, + "eval_loss": 0.6000384092330933, + "eval_rewards/accuracies": 0.6754999756813049, + "eval_rewards/chosen": -1.4829176664352417, + "eval_rewards/margins": 0.44115301966667175, + "eval_rewards/rejected": -1.924070954322815, + "eval_runtime": 196.6456, + "eval_samples_per_second": 10.171, + "eval_steps_per_second": 5.085, + "step": 7500 + }, + { + "epoch": 0.98, + "learning_rate": 4.476627512425558e-09, + "logits/chosen": -2.4267430305480957, + "logits/rejected": -2.4429757595062256, + "logps/chosen": -481.5536193847656, + "logps/rejected": -499.205078125, + "loss": 0.5986, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.4090001583099365, + "rewards/margins": 0.3903266489505768, + "rewards/rejected": -1.7993266582489014, + "step": 7510 + }, + { + "epoch": 0.98, + "eval_logits/chosen": -2.234553813934326, + "eval_logits/rejected": -2.2424240112304688, + "eval_logps/chosen": -481.0640869140625, + "eval_logps/rejected": -493.5656433105469, + "eval_loss": 0.5998890399932861, + "eval_rewards/accuracies": 0.6729999780654907, + "eval_rewards/chosen": -1.4831470251083374, + "eval_rewards/margins": 0.4414092004299164, + "eval_rewards/rejected": -1.9245561361312866, + "eval_runtime": 196.947, + "eval_samples_per_second": 10.155, + "eval_steps_per_second": 5.078, + "step": 7510 + }, + { + "epoch": 0.98, + "learning_rate": 3.819426423412875e-09, + "logits/chosen": -2.4812378883361816, + "logits/rejected": -2.4551587104797363, + "logps/chosen": -514.4886474609375, + "logps/rejected": -527.8831787109375, + "loss": 0.5748, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -1.5342094898223877, + "rewards/margins": 0.5676885843276978, + "rewards/rejected": -2.101898193359375, + "step": 7520 + }, + { + "epoch": 0.98, + "eval_logits/chosen": -2.235311985015869, + "eval_logits/rejected": -2.243098497390747, + "eval_logps/chosen": -481.0576171875, + "eval_logps/rejected": -493.5203857421875, + "eval_loss": 0.6000725030899048, + "eval_rewards/accuracies": 0.6735000014305115, + "eval_rewards/chosen": -1.4830819368362427, + "eval_rewards/margins": 0.44102197885513306, + "eval_rewards/rejected": -1.9241037368774414, + "eval_runtime": 197.0258, + "eval_samples_per_second": 10.151, + "eval_steps_per_second": 5.075, + "step": 7520 + }, + { + "epoch": 0.99, + "learning_rate": 3.2143331962256053e-09, + "logits/chosen": -2.4706759452819824, + "logits/rejected": -2.4405550956726074, + "logps/chosen": -497.95977783203125, + "logps/rejected": -523.1436157226562, + "loss": 0.6262, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -1.4370882511138916, + "rewards/margins": 0.4015069901943207, + "rewards/rejected": -1.8385951519012451, + "step": 7530 + }, + { + "epoch": 0.99, + "eval_logits/chosen": -2.234773874282837, + "eval_logits/rejected": -2.242658853530884, + "eval_logps/chosen": -481.1151428222656, + "eval_logps/rejected": -493.6004333496094, + "eval_loss": 0.5999842286109924, + "eval_rewards/accuracies": 0.675000011920929, + "eval_rewards/chosen": -1.4836574792861938, + "eval_rewards/margins": 0.4412464201450348, + "eval_rewards/rejected": -1.9249041080474854, + "eval_runtime": 197.1165, + "eval_samples_per_second": 10.146, + "eval_steps_per_second": 5.073, + "step": 7530 + }, + { + "epoch": 0.99, + "learning_rate": 2.6613604622066635e-09, + "logits/chosen": -2.541171073913574, + "logits/rejected": -2.5328097343444824, + "logps/chosen": -455.81689453125, + "logps/rejected": -507.7245178222656, + "loss": 0.5998, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.3390320539474487, + "rewards/margins": 0.43062907457351685, + "rewards/rejected": -1.7696613073349, + "step": 7540 + }, + { + "epoch": 0.99, + "eval_logits/chosen": -2.235093116760254, + "eval_logits/rejected": -2.2430434226989746, + "eval_logps/chosen": -481.0205993652344, + "eval_logps/rejected": -493.4674377441406, + "eval_loss": 0.6002621054649353, + "eval_rewards/accuracies": 0.6744999885559082, + "eval_rewards/chosen": -1.4827115535736084, + "eval_rewards/margins": 0.44086259603500366, + "eval_rewards/rejected": -1.9235742092132568, + "eval_runtime": 197.3737, + "eval_samples_per_second": 10.133, + "eval_steps_per_second": 5.067, + "step": 7540 + }, + { + "epoch": 0.99, + "learning_rate": 2.1605197646826228e-09, + "logits/chosen": -2.346137523651123, + "logits/rejected": -2.3369574546813965, + "logps/chosen": -441.90081787109375, + "logps/rejected": -449.46826171875, + "loss": 0.5577, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.3383251428604126, + "rewards/margins": 0.5153234601020813, + "rewards/rejected": -1.8536484241485596, + "step": 7550 + }, + { + "epoch": 0.99, + "eval_logits/chosen": -2.2347373962402344, + "eval_logits/rejected": -2.2426373958587646, + "eval_logps/chosen": -481.1553955078125, + "eval_logps/rejected": -493.6079406738281, + "eval_loss": 0.6001291275024414, + "eval_rewards/accuracies": 0.6744999885559082, + "eval_rewards/chosen": -1.4840598106384277, + "eval_rewards/margins": 0.44091925024986267, + "eval_rewards/rejected": -1.9249789714813232, + "eval_runtime": 196.9374, + "eval_samples_per_second": 10.156, + "eval_steps_per_second": 5.078, + "step": 7550 + }, + { + "epoch": 0.99, + "learning_rate": 1.711821558721405e-09, + "logits/chosen": -2.4623870849609375, + "logits/rejected": -2.449855327606201, + "logps/chosen": -520.6915283203125, + "logps/rejected": -494.63409423828125, + "loss": 0.5462, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -1.4562956094741821, + "rewards/margins": 0.49862104654312134, + "rewards/rejected": -1.9549165964126587, + "step": 7560 + }, + { + "epoch": 0.99, + "eval_logits/chosen": -2.23518705368042, + "eval_logits/rejected": -2.2431421279907227, + "eval_logps/chosen": -481.0464172363281, + "eval_logps/rejected": -493.4737854003906, + "eval_loss": 0.6001744270324707, + "eval_rewards/accuracies": 0.6735000014305115, + "eval_rewards/chosen": -1.4829697608947754, + "eval_rewards/margins": 0.44066765904426575, + "eval_rewards/rejected": -1.9236375093460083, + "eval_runtime": 197.0436, + "eval_samples_per_second": 10.15, + "eval_steps_per_second": 5.075, + "step": 7560 + }, + { + "epoch": 0.99, + "learning_rate": 1.3152752109149569e-09, + "logits/chosen": -2.4634616374969482, + "logits/rejected": -2.4458584785461426, + "logps/chosen": -497.2911071777344, + "logps/rejected": -506.8922424316406, + "loss": 0.6308, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.5672760009765625, + "rewards/margins": 0.35916125774383545, + "rewards/rejected": -1.9264371395111084, + "step": 7570 + }, + { + "epoch": 0.99, + "eval_logits/chosen": -2.2348592281341553, + "eval_logits/rejected": -2.2428770065307617, + "eval_logps/chosen": -480.9827880859375, + "eval_logps/rejected": -493.4491882324219, + "eval_loss": 0.6000152230262756, + "eval_rewards/accuracies": 0.671999990940094, + "eval_rewards/chosen": -1.4823333024978638, + "eval_rewards/margins": 0.44105857610702515, + "eval_rewards/rejected": -1.9233920574188232, + "eval_runtime": 197.0461, + "eval_samples_per_second": 10.15, + "eval_steps_per_second": 5.075, + "step": 7570 + }, + { + "epoch": 0.99, + "learning_rate": 9.708889991830173e-10, + "logits/chosen": -2.4818179607391357, + "logits/rejected": -2.464740037918091, + "logps/chosen": -490.53399658203125, + "logps/rejected": -458.87774658203125, + "loss": 0.5767, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -1.4643405675888062, + "rewards/margins": 0.48608309030532837, + "rewards/rejected": -1.9504238367080688, + "step": 7580 + }, + { + "epoch": 0.99, + "eval_logits/chosen": -2.235227346420288, + "eval_logits/rejected": -2.2431695461273193, + "eval_logps/chosen": -480.998779296875, + "eval_logps/rejected": -493.45257568359375, + "eval_loss": 0.6000584959983826, + "eval_rewards/accuracies": 0.6759999990463257, + "eval_rewards/chosen": -1.48249351978302, + "eval_rewards/margins": 0.4409320652484894, + "eval_rewards/rejected": -1.9234256744384766, + "eval_runtime": 196.8116, + "eval_samples_per_second": 10.162, + "eval_steps_per_second": 5.081, + "step": 7580 + }, + { + "epoch": 0.99, + "learning_rate": 6.786701125999218e-10, + "logits/chosen": -2.364657163619995, + "logits/rejected": -2.3832263946533203, + "logps/chosen": -484.3373107910156, + "logps/rejected": -493.5921325683594, + "loss": 0.7282, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -1.6983461380004883, + "rewards/margins": 0.2650797963142395, + "rewards/rejected": -1.9634259939193726, + "step": 7590 + }, + { + "epoch": 0.99, + "eval_logits/chosen": -2.2349560260772705, + "eval_logits/rejected": -2.242851495742798, + "eval_logps/chosen": -480.9801940917969, + "eval_logps/rejected": -493.40118408203125, + "eval_loss": 0.6001663208007812, + "eval_rewards/accuracies": 0.675000011920929, + "eval_rewards/chosen": -1.4823077917099, + "eval_rewards/margins": 0.4406040608882904, + "eval_rewards/rejected": -1.9229116439819336, + "eval_runtime": 196.9615, + "eval_samples_per_second": 10.154, + "eval_steps_per_second": 5.077, + "step": 7590 + }, + { + "epoch": 0.99, + "learning_rate": 4.3862465124638873e-10, + "logits/chosen": -2.3418660163879395, + "logits/rejected": -2.384479522705078, + "logps/chosen": -473.34197998046875, + "logps/rejected": -479.49615478515625, + "loss": 0.6687, + "rewards/accuracies": 0.625, + "rewards/chosen": -1.5078928470611572, + "rewards/margins": 0.28139907121658325, + "rewards/rejected": -1.7892920970916748, + "step": 7600 + }, + { + "epoch": 0.99, + "eval_logits/chosen": -2.234585762023926, + "eval_logits/rejected": -2.2425272464752197, + "eval_logps/chosen": -481.1004333496094, + "eval_logps/rejected": -493.569091796875, + "eval_loss": 0.6000926494598389, + "eval_rewards/accuracies": 0.6754999756813049, + "eval_rewards/chosen": -1.48350989818573, + "eval_rewards/margins": 0.4410809576511383, + "eval_rewards/rejected": -1.924590826034546, + "eval_runtime": 197.1425, + "eval_samples_per_second": 10.145, + "eval_steps_per_second": 5.072, + "step": 7600 + }, + { + "epoch": 1.0, + "learning_rate": 2.507576260799005e-10, + "logits/chosen": -2.5632288455963135, + "logits/rejected": -2.518597364425659, + "logps/chosen": -519.2454833984375, + "logps/rejected": -549.087158203125, + "loss": 0.5566, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.4352588653564453, + "rewards/margins": 0.5633091926574707, + "rewards/rejected": -1.9985681772232056, + "step": 7610 + }, + { + "epoch": 1.0, + "eval_logits/chosen": -2.234978437423706, + "eval_logits/rejected": -2.242854118347168, + "eval_logps/chosen": -481.08746337890625, + "eval_logps/rejected": -493.5091857910156, + "eval_loss": 0.6003447771072388, + "eval_rewards/accuracies": 0.6735000014305115, + "eval_rewards/chosen": -1.4833803176879883, + "eval_rewards/margins": 0.44061169028282166, + "eval_rewards/rejected": -1.9239921569824219, + "eval_runtime": 196.9114, + "eval_samples_per_second": 10.157, + "eval_steps_per_second": 5.078, + "step": 7610 + }, + { + "epoch": 1.0, + "learning_rate": 1.1507295883145253e-10, + "logits/chosen": -2.475334644317627, + "logits/rejected": -2.509917736053467, + "logps/chosen": -488.1448669433594, + "logps/rejected": -532.739501953125, + "loss": 0.5543, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.3513810634613037, + "rewards/margins": 0.5696924924850464, + "rewards/rejected": -1.92107355594635, + "step": 7620 + }, + { + "epoch": 1.0, + "eval_logits/chosen": -2.235002040863037, + "eval_logits/rejected": -2.2428789138793945, + "eval_logps/chosen": -481.00299072265625, + "eval_logps/rejected": -493.47991943359375, + "eval_loss": 0.6000400185585022, + "eval_rewards/accuracies": 0.6729999780654907, + "eval_rewards/chosen": -1.4825358390808105, + "eval_rewards/margins": 0.4411628842353821, + "eval_rewards/rejected": -1.9236990213394165, + "eval_runtime": 197.0703, + "eval_samples_per_second": 10.149, + "eval_steps_per_second": 5.074, + "step": 7620 + }, + { + "epoch": 1.0, + "learning_rate": 3.1573481923952156e-11, + "logits/chosen": -2.420581817626953, + "logits/rejected": -2.3721389770507812, + "logps/chosen": -537.7681884765625, + "logps/rejected": -545.4634399414062, + "loss": 0.5888, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.4983834028244019, + "rewards/margins": 0.4905606806278229, + "rewards/rejected": -1.9889440536499023, + "step": 7630 + }, + { + "epoch": 1.0, + "eval_logits/chosen": -2.234976291656494, + "eval_logits/rejected": -2.243018627166748, + "eval_logps/chosen": -480.9837951660156, + "eval_logps/rejected": -493.3998107910156, + "eval_loss": 0.6002876162528992, + "eval_rewards/accuracies": 0.6744999885559082, + "eval_rewards/chosen": -1.4823437929153442, + "eval_rewards/margins": 0.44055426120758057, + "eval_rewards/rejected": -1.9228979349136353, + "eval_runtime": 196.934, + "eval_samples_per_second": 10.156, + "eval_steps_per_second": 5.078, + "step": 7630 + }, + { + "epoch": 1.0, + "learning_rate": 2.609384119889313e-13, + "logits/chosen": -2.3895089626312256, + "logits/rejected": -2.3862829208374023, + "logps/chosen": -467.02752685546875, + "logps/rejected": -502.67852783203125, + "loss": 0.5937, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -1.4769508838653564, + "rewards/margins": 0.4055989384651184, + "rewards/rejected": -1.8825498819351196, + "step": 7640 + }, + { + "epoch": 1.0, + "eval_logits/chosen": -2.2347497940063477, + "eval_logits/rejected": -2.242765188217163, + "eval_logps/chosen": -481.00506591796875, + "eval_logps/rejected": -493.4847717285156, + "eval_loss": 0.599940299987793, + "eval_rewards/accuracies": 0.6735000014305115, + "eval_rewards/chosen": -1.4825562238693237, + "eval_rewards/margins": 0.4411916732788086, + "eval_rewards/rejected": -1.9237478971481323, + "eval_runtime": 196.8595, + "eval_samples_per_second": 10.16, + "eval_steps_per_second": 5.08, + "step": 7640 + }, + { + "epoch": 1.0, + "step": 7641, + "total_flos": 0.0, + "train_loss": 0.6145847465156994, + "train_runtime": 171708.6447, + "train_samples_per_second": 0.356, + "train_steps_per_second": 0.044 + } + ], + "logging_steps": 10, + "max_steps": 7641, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 100, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}