{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.974822466107166, "eval_steps": 100, "global_step": 288, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 1.7241379310344825e-08, "logits/chosen": -2.5584306716918945, "logits/rejected": -2.4569621086120605, "logps/chosen": -206.14129638671875, "logps/rejected": -183.71591186523438, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.1, "learning_rate": 1.7241379310344828e-07, "logits/chosen": -2.489280939102173, "logits/rejected": -2.4284753799438477, "logps/chosen": -280.86859130859375, "logps/rejected": -227.53990173339844, "loss": 0.6915, "rewards/accuracies": 0.4791666567325592, "rewards/chosen": 0.0040930104441940784, "rewards/margins": 0.0039877030067145824, "rewards/rejected": 0.000105307022749912, "step": 10 }, { "epoch": 0.21, "learning_rate": 3.4482758620689656e-07, "logits/chosen": -2.539264440536499, "logits/rejected": -2.4608712196350098, "logps/chosen": -269.39410400390625, "logps/rejected": -224.4403533935547, "loss": 0.6938, "rewards/accuracies": 0.47343748807907104, "rewards/chosen": -0.0009769044118002057, "rewards/margins": -0.000444817531388253, "rewards/rejected": -0.0005320868222042918, "step": 20 }, { "epoch": 0.31, "learning_rate": 4.98069498069498e-07, "logits/chosen": -2.5617613792419434, "logits/rejected": -2.4910457134246826, "logps/chosen": -276.52783203125, "logps/rejected": -244.70361328125, "loss": 0.6899, "rewards/accuracies": 0.5453125238418579, "rewards/chosen": 0.003957667388021946, "rewards/margins": 0.0072895376943051815, "rewards/rejected": -0.003331870539113879, "step": 30 }, { "epoch": 0.41, "learning_rate": 4.787644787644788e-07, "logits/chosen": -2.5235114097595215, "logits/rejected": -2.470004081726074, "logps/chosen": -273.86065673828125, "logps/rejected": -238.51052856445312, "loss": 0.6927, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": 0.0006143165519461036, "rewards/margins": 0.0018405301962047815, "rewards/rejected": -0.0012262131785973907, "step": 40 }, { "epoch": 0.52, "learning_rate": 4.594594594594595e-07, "logits/chosen": -2.520514965057373, "logits/rejected": -2.4670748710632324, "logps/chosen": -264.4063415527344, "logps/rejected": -223.18936157226562, "loss": 0.6866, "rewards/accuracies": 0.596875011920929, "rewards/chosen": 0.011737896129488945, "rewards/margins": 0.014031621627509594, "rewards/rejected": -0.00229372619651258, "step": 50 }, { "epoch": 0.62, "learning_rate": 4.4015444015444015e-07, "logits/chosen": -2.5391576290130615, "logits/rejected": -2.5070621967315674, "logps/chosen": -264.4425354003906, "logps/rejected": -213.7183837890625, "loss": 0.6864, "rewards/accuracies": 0.5953124761581421, "rewards/chosen": 0.014557396993041039, "rewards/margins": 0.014714914374053478, "rewards/rejected": -0.0001575160276843235, "step": 60 }, { "epoch": 0.72, "learning_rate": 4.208494208494208e-07, "logits/chosen": -2.4986374378204346, "logits/rejected": -2.46673583984375, "logps/chosen": -268.71917724609375, "logps/rejected": -217.59912109375, "loss": 0.6838, "rewards/accuracies": 0.6109374761581421, "rewards/chosen": 0.01515167485922575, "rewards/margins": 0.020194347947835922, "rewards/rejected": -0.005042673088610172, "step": 70 }, { "epoch": 0.83, "learning_rate": 4.015444015444015e-07, "logits/chosen": -2.522416591644287, "logits/rejected": -2.472027540206909, "logps/chosen": -251.3960723876953, "logps/rejected": -211.45138549804688, "loss": 0.6815, "rewards/accuracies": 0.6156250238418579, "rewards/chosen": 0.02532876469194889, "rewards/margins": 0.02509908750653267, "rewards/rejected": 0.000229675744776614, "step": 80 }, { "epoch": 0.93, "learning_rate": 3.8223938223938225e-07, "logits/chosen": -2.4934628009796143, "logits/rejected": -2.4291629791259766, "logps/chosen": -255.72384643554688, "logps/rejected": -220.0627899169922, "loss": 0.6768, "rewards/accuracies": 0.6578124761581421, "rewards/chosen": 0.0247894749045372, "rewards/margins": 0.03499498590826988, "rewards/rejected": -0.010205509141087532, "step": 90 }, { "epoch": 0.99, "eval_logits/chosen": -2.227679491043091, "eval_logits/rejected": -2.1611783504486084, "eval_logps/chosen": -264.68023681640625, "eval_logps/rejected": -222.43667602539062, "eval_loss": 0.6764230728149414, "eval_rewards/accuracies": 0.6380000114440918, "eval_rewards/chosen": 0.03063117153942585, "eval_rewards/margins": 0.036278609186410904, "eval_rewards/rejected": -0.005647439509630203, "eval_runtime": 2325.01, "eval_samples_per_second": 0.86, "eval_steps_per_second": 0.215, "step": 96 }, { "epoch": 1.03, "learning_rate": 3.629343629343629e-07, "logits/chosen": -2.5082602500915527, "logits/rejected": -2.435072422027588, "logps/chosen": -275.8153991699219, "logps/rejected": -227.73828125, "loss": 0.6755, "rewards/accuracies": 0.6468750238418579, "rewards/chosen": 0.02948899194598198, "rewards/margins": 0.03778881952166557, "rewards/rejected": -0.008299829438328743, "step": 100 }, { "epoch": 1.14, "learning_rate": 3.436293436293436e-07, "logits/chosen": -2.5289313793182373, "logits/rejected": -2.4763355255126953, "logps/chosen": -274.30853271484375, "logps/rejected": -218.49853515625, "loss": 0.6704, "rewards/accuracies": 0.6796875, "rewards/chosen": 0.03949684649705887, "rewards/margins": 0.04895726591348648, "rewards/rejected": -0.009460421279072762, "step": 110 }, { "epoch": 1.24, "learning_rate": 3.243243243243243e-07, "logits/chosen": -2.5224318504333496, "logits/rejected": -2.447321653366089, "logps/chosen": -271.16827392578125, "logps/rejected": -229.1170654296875, "loss": 0.6706, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.03896590322256088, "rewards/margins": 0.048555582761764526, "rewards/rejected": -0.009589677676558495, "step": 120 }, { "epoch": 1.34, "learning_rate": 3.0501930501930503e-07, "logits/chosen": -2.5046470165252686, "logits/rejected": -2.4693236351013184, "logps/chosen": -255.78515625, "logps/rejected": -213.4404754638672, "loss": 0.667, "rewards/accuracies": 0.684374988079071, "rewards/chosen": 0.04456660896539688, "rewards/margins": 0.056311529129743576, "rewards/rejected": -0.011744923889636993, "step": 130 }, { "epoch": 1.45, "learning_rate": 2.857142857142857e-07, "logits/chosen": -2.5624308586120605, "logits/rejected": -2.487908363342285, "logps/chosen": -272.53729248046875, "logps/rejected": -217.79345703125, "loss": 0.6681, "rewards/accuracies": 0.667187511920929, "rewards/chosen": 0.04305075854063034, "rewards/margins": 0.05537617206573486, "rewards/rejected": -0.012325407937169075, "step": 140 }, { "epoch": 1.55, "learning_rate": 2.664092664092664e-07, "logits/chosen": -2.5151801109313965, "logits/rejected": -2.459423065185547, "logps/chosen": -268.97735595703125, "logps/rejected": -231.13119506835938, "loss": 0.663, "rewards/accuracies": 0.6890624761581421, "rewards/chosen": 0.051591645926237106, "rewards/margins": 0.06633338332176208, "rewards/rejected": -0.01474173553287983, "step": 150 }, { "epoch": 1.65, "learning_rate": 2.471042471042471e-07, "logits/chosen": -2.5403904914855957, "logits/rejected": -2.4852936267852783, "logps/chosen": -255.5146484375, "logps/rejected": -223.44631958007812, "loss": 0.6668, "rewards/accuracies": 0.65625, "rewards/chosen": 0.047541543841362, "rewards/margins": 0.058655254542827606, "rewards/rejected": -0.01111371349543333, "step": 160 }, { "epoch": 1.76, "learning_rate": 2.2779922779922777e-07, "logits/chosen": -2.511491298675537, "logits/rejected": -2.454376220703125, "logps/chosen": -260.7484436035156, "logps/rejected": -226.0450439453125, "loss": 0.6666, "rewards/accuracies": 0.645312488079071, "rewards/chosen": 0.04495326429605484, "rewards/margins": 0.06048337370157242, "rewards/rejected": -0.015530114993453026, "step": 170 }, { "epoch": 1.86, "learning_rate": 2.084942084942085e-07, "logits/chosen": -2.5392658710479736, "logits/rejected": -2.471736431121826, "logps/chosen": -273.15655517578125, "logps/rejected": -242.5560760498047, "loss": 0.664, "rewards/accuracies": 0.6578124761581421, "rewards/chosen": 0.05149110406637192, "rewards/margins": 0.06509838253259659, "rewards/rejected": -0.01360728032886982, "step": 180 }, { "epoch": 1.96, "learning_rate": 1.891891891891892e-07, "logits/chosen": -2.512092113494873, "logits/rejected": -2.460084915161133, "logps/chosen": -270.2705993652344, "logps/rejected": -220.6813507080078, "loss": 0.6591, "rewards/accuracies": 0.690625011920929, "rewards/chosen": 0.05787338688969612, "rewards/margins": 0.07586757838726044, "rewards/rejected": -0.017994191497564316, "step": 190 }, { "epoch": 1.99, "eval_logits/chosen": -2.226806879043579, "eval_logits/rejected": -2.160585641860962, "eval_logps/chosen": -264.43328857421875, "eval_logps/rejected": -222.5832061767578, "eval_loss": 0.6596261858940125, "eval_rewards/accuracies": 0.6775000095367432, "eval_rewards/chosen": 0.0553288571536541, "eval_rewards/margins": 0.07563061267137527, "eval_rewards/rejected": -0.020301757380366325, "eval_runtime": 2319.8213, "eval_samples_per_second": 0.862, "eval_steps_per_second": 0.216, "step": 193 }, { "epoch": 2.07, "learning_rate": 1.6988416988416988e-07, "logits/chosen": -2.5378737449645996, "logits/rejected": -2.4820332527160645, "logps/chosen": -282.517822265625, "logps/rejected": -232.89395141601562, "loss": 0.6578, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": 0.05665069818496704, "rewards/margins": 0.07951916754245758, "rewards/rejected": -0.022868463769555092, "step": 200 }, { "epoch": 2.17, "learning_rate": 1.5057915057915057e-07, "logits/chosen": -2.5581231117248535, "logits/rejected": -2.4872212409973145, "logps/chosen": -266.2125244140625, "logps/rejected": -215.0002899169922, "loss": 0.6547, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": 0.059196583926677704, "rewards/margins": 0.08533494174480438, "rewards/rejected": -0.026138361543416977, "step": 210 }, { "epoch": 2.27, "learning_rate": 1.3127413127413127e-07, "logits/chosen": -2.505333185195923, "logits/rejected": -2.419924020767212, "logps/chosen": -257.5528259277344, "logps/rejected": -226.89492797851562, "loss": 0.655, "rewards/accuracies": 0.706250011920929, "rewards/chosen": 0.053452588617801666, "rewards/margins": 0.08509759604930878, "rewards/rejected": -0.03164501488208771, "step": 220 }, { "epoch": 2.38, "learning_rate": 1.1196911196911196e-07, "logits/chosen": -2.5441360473632812, "logits/rejected": -2.4835963249206543, "logps/chosen": -270.8502502441406, "logps/rejected": -222.893310546875, "loss": 0.6558, "rewards/accuracies": 0.684374988079071, "rewards/chosen": 0.06665085256099701, "rewards/margins": 0.08379445225000381, "rewards/rejected": -0.017143595963716507, "step": 230 }, { "epoch": 2.48, "learning_rate": 9.266409266409266e-08, "logits/chosen": -2.562384843826294, "logits/rejected": -2.5112578868865967, "logps/chosen": -266.3863220214844, "logps/rejected": -231.86294555664062, "loss": 0.6565, "rewards/accuracies": 0.6953125, "rewards/chosen": 0.0619996078312397, "rewards/margins": 0.0826488584280014, "rewards/rejected": -0.020649263635277748, "step": 240 }, { "epoch": 2.58, "learning_rate": 7.335907335907336e-08, "logits/chosen": -2.462939739227295, "logits/rejected": -2.4160337448120117, "logps/chosen": -248.96240234375, "logps/rejected": -223.98739624023438, "loss": 0.6648, "rewards/accuracies": 0.640625, "rewards/chosen": 0.05460253357887268, "rewards/margins": 0.06585252285003662, "rewards/rejected": -0.01124998927116394, "step": 250 }, { "epoch": 2.69, "learning_rate": 5.4054054054054056e-08, "logits/chosen": -2.508650541305542, "logits/rejected": -2.4627132415771484, "logps/chosen": -274.69873046875, "logps/rejected": -226.0441131591797, "loss": 0.6499, "rewards/accuracies": 0.6875, "rewards/chosen": 0.06910406053066254, "rewards/margins": 0.09896949678659439, "rewards/rejected": -0.02986542508006096, "step": 260 }, { "epoch": 2.79, "learning_rate": 3.474903474903475e-08, "logits/chosen": -2.5484378337860107, "logits/rejected": -2.4623117446899414, "logps/chosen": -273.59967041015625, "logps/rejected": -225.9320831298828, "loss": 0.6555, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.061413638293743134, "rewards/margins": 0.08604761958122253, "rewards/rejected": -0.02463398687541485, "step": 270 }, { "epoch": 2.89, "learning_rate": 1.5444015444015443e-08, "logits/chosen": -2.484541416168213, "logits/rejected": -2.43548321723938, "logps/chosen": -265.0757141113281, "logps/rejected": -211.746337890625, "loss": 0.6511, "rewards/accuracies": 0.6968749761581421, "rewards/chosen": 0.07040460407733917, "rewards/margins": 0.09466332197189331, "rewards/rejected": -0.024258721619844437, "step": 280 }, { "epoch": 2.97, "eval_logits/chosen": -2.226133108139038, "eval_logits/rejected": -2.1600077152252197, "eval_logps/chosen": -264.3455505371094, "eval_logps/rejected": -222.62977600097656, "eval_loss": 0.6542457342147827, "eval_rewards/accuracies": 0.6765000224113464, "eval_rewards/chosen": 0.06409955769777298, "eval_rewards/margins": 0.08906211704015732, "eval_rewards/rejected": -0.024962568655610085, "eval_runtime": 2321.5859, "eval_samples_per_second": 0.861, "eval_steps_per_second": 0.215, "step": 288 }, { "epoch": 2.97, "step": 288, "total_flos": 0.0, "train_loss": 0.6692726473427482, "train_runtime": 42824.2925, "train_samples_per_second": 0.434, "train_steps_per_second": 0.007 } ], "logging_steps": 10, "max_steps": 288, "num_train_epochs": 3, "save_steps": 500, "total_flos": 0.0, "trial_name": null, "trial_params": null }