{ "best_metric": 0.5855222344398499, "best_model_checkpoint": "data/tinyllama_moe_dpo_ultrafeedback_v2_epochs3/checkpoint-2700", "epoch": 2.998430141287284, "eval_steps": 100, "global_step": 2865, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5.208333333333333e-09, "logits/chosen": -3.0106258392333984, "logits/rejected": -3.0041162967681885, "logps/chosen": -291.6616516113281, "logps/rejected": -273.537353515625, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.01, "learning_rate": 5.208333333333333e-08, "logits/chosen": -3.0320074558258057, "logits/rejected": -2.934544801712036, "logps/chosen": -352.8655090332031, "logps/rejected": -284.1784362792969, "loss": 0.6931, "rewards/accuracies": 0.4236111044883728, "rewards/chosen": -0.00030098477145656943, "rewards/margins": 6.371454219333827e-05, "rewards/rejected": -0.00036469934275373816, "step": 10 }, { "epoch": 0.02, "learning_rate": 1.0416666666666667e-07, "logits/chosen": -2.9808428287506104, "logits/rejected": -2.9612295627593994, "logps/chosen": -309.6392822265625, "logps/rejected": -278.2618103027344, "loss": 0.6931, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.0001357152796117589, "rewards/margins": 0.00045497194514609873, "rewards/rejected": -0.0003192565927747637, "step": 20 }, { "epoch": 0.03, "learning_rate": 1.5624999999999999e-07, "logits/chosen": -3.041250705718994, "logits/rejected": -2.9839859008789062, "logps/chosen": -342.7677917480469, "logps/rejected": -301.0032653808594, "loss": 0.6933, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -0.00031170996953733265, "rewards/margins": -0.00037957567838020623, "rewards/rejected": 6.78658252581954e-05, "step": 30 }, { "epoch": 0.04, "learning_rate": 2.0833333333333333e-07, "logits/chosen": -3.019380807876587, "logits/rejected": -2.974083423614502, "logps/chosen": -331.848876953125, "logps/rejected": -276.879150390625, "loss": 0.6933, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.00034866915666498244, "rewards/margins": -0.00015269347932189703, "rewards/rejected": -0.00019597564823925495, "step": 40 }, { "epoch": 0.05, "learning_rate": 2.604166666666667e-07, "logits/chosen": -3.025217294692993, "logits/rejected": -2.984266757965088, "logps/chosen": -347.17523193359375, "logps/rejected": -309.79034423828125, "loss": 0.6932, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -0.0004488878767006099, "rewards/margins": -0.0009045412880368531, "rewards/rejected": 0.0004556533822324127, "step": 50 }, { "epoch": 0.06, "learning_rate": 3.1249999999999997e-07, "logits/chosen": -3.008871078491211, "logits/rejected": -2.947890281677246, "logps/chosen": -348.37127685546875, "logps/rejected": -318.5699462890625, "loss": 0.6931, "rewards/accuracies": 0.46875, "rewards/chosen": -1.417404564563185e-05, "rewards/margins": -0.00020001048687845469, "rewards/rejected": 0.0001858363684732467, "step": 60 }, { "epoch": 0.07, "learning_rate": 3.645833333333333e-07, "logits/chosen": -3.078416585922241, "logits/rejected": -3.026381492614746, "logps/chosen": -382.86102294921875, "logps/rejected": -335.41156005859375, "loss": 0.6928, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": 0.0005409394507296383, "rewards/margins": 0.0006863707094453275, "rewards/rejected": -0.00014543140423484147, "step": 70 }, { "epoch": 0.08, "learning_rate": 4.1666666666666667e-07, "logits/chosen": -2.9282450675964355, "logits/rejected": -2.872313976287842, "logps/chosen": -355.75653076171875, "logps/rejected": -294.4638366699219, "loss": 0.6925, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.0010749772191047668, "rewards/margins": 0.0013450259575620294, "rewards/rejected": -0.0002700486802496016, "step": 80 }, { "epoch": 0.09, "learning_rate": 4.6874999999999996e-07, "logits/chosen": -3.0248095989227295, "logits/rejected": -2.9788899421691895, "logps/chosen": -348.9649963378906, "logps/rejected": -311.44647216796875, "loss": 0.6918, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": 0.0013544766698032618, "rewards/margins": 0.0028665403369814157, "rewards/rejected": -0.001512063667178154, "step": 90 }, { "epoch": 0.1, "learning_rate": 4.999974255581648e-07, "logits/chosen": -2.998736619949341, "logits/rejected": -2.972041606903076, "logps/chosen": -367.4033203125, "logps/rejected": -341.64483642578125, "loss": 0.6914, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.00238443398848176, "rewards/margins": 0.0027559935115277767, "rewards/rejected": -0.00037155949394218624, "step": 100 }, { "epoch": 0.1, "eval_logits/chosen": -3.040524482727051, "eval_logits/rejected": -2.987576484680176, "eval_logps/chosen": -344.61151123046875, "eval_logps/rejected": -302.75537109375, "eval_loss": 0.6913198828697205, "eval_rewards/accuracies": 0.6349206566810608, "eval_rewards/chosen": 0.0043304311111569405, "eval_rewards/margins": 0.004837073851376772, "eval_rewards/rejected": -0.0005066432058811188, "eval_runtime": 244.6034, "eval_samples_per_second": 8.177, "eval_steps_per_second": 0.258, "step": 100 }, { "epoch": 0.12, "learning_rate": 4.999684636964402e-07, "logits/chosen": -3.017376661300659, "logits/rejected": -2.9261746406555176, "logps/chosen": -315.4708557128906, "logps/rejected": -251.8899383544922, "loss": 0.6908, "rewards/accuracies": 0.581250011920929, "rewards/chosen": 0.0034377477131783962, "rewards/margins": 0.0038212400395423174, "rewards/rejected": -0.00038349232636392117, "step": 110 }, { "epoch": 0.13, "learning_rate": 4.999073256611217e-07, "logits/chosen": -2.9883151054382324, "logits/rejected": -2.910341262817383, "logps/chosen": -361.08331298828125, "logps/rejected": -289.36883544921875, "loss": 0.6904, "rewards/accuracies": 0.625, "rewards/chosen": 0.007036352995783091, "rewards/margins": 0.008973561227321625, "rewards/rejected": -0.0019372075330466032, "step": 120 }, { "epoch": 0.14, "learning_rate": 4.998140193219545e-07, "logits/chosen": -3.0366291999816895, "logits/rejected": -2.9711904525756836, "logps/chosen": -381.4322204589844, "logps/rejected": -310.4883728027344, "loss": 0.6896, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.00792085099965334, "rewards/margins": 0.0068405852653086185, "rewards/rejected": 0.0010802658507600427, "step": 130 }, { "epoch": 0.15, "learning_rate": 4.996885566894172e-07, "logits/chosen": -2.9757721424102783, "logits/rejected": -2.955821990966797, "logps/chosen": -286.1941833496094, "logps/rejected": -260.9352111816406, "loss": 0.6888, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": 0.007854754105210304, "rewards/margins": 0.008457413874566555, "rewards/rejected": -0.0006026608753018081, "step": 140 }, { "epoch": 0.16, "learning_rate": 4.995309539131771e-07, "logits/chosen": -3.035512685775757, "logits/rejected": -3.00437593460083, "logps/chosen": -335.96539306640625, "logps/rejected": -335.1185607910156, "loss": 0.6891, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.00691782683134079, "rewards/margins": 0.003046808298677206, "rewards/rejected": 0.0038710187654942274, "step": 150 }, { "epoch": 0.17, "learning_rate": 4.993412312800101e-07, "logits/chosen": -3.0231873989105225, "logits/rejected": -2.9608006477355957, "logps/chosen": -354.9385681152344, "logps/rejected": -332.57757568359375, "loss": 0.687, "rewards/accuracies": 0.643750011920929, "rewards/chosen": 0.010783755220472813, "rewards/margins": 0.010257494635879993, "rewards/rejected": 0.0005262610502541065, "step": 160 }, { "epoch": 0.18, "learning_rate": 4.991194132111906e-07, "logits/chosen": -3.037797212600708, "logits/rejected": -2.991992473602295, "logps/chosen": -325.5256042480469, "logps/rejected": -280.1507263183594, "loss": 0.686, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.015455600805580616, "rewards/margins": 0.014618036337196827, "rewards/rejected": 0.0008375659817829728, "step": 170 }, { "epoch": 0.19, "learning_rate": 4.988655282593471e-07, "logits/chosen": -2.967064619064331, "logits/rejected": -2.900017261505127, "logps/chosen": -299.7720947265625, "logps/rejected": -267.1083984375, "loss": 0.6848, "rewards/accuracies": 0.643750011920929, "rewards/chosen": 0.011125795543193817, "rewards/margins": 0.019041184335947037, "rewards/rejected": -0.00791538879275322, "step": 180 }, { "epoch": 0.2, "learning_rate": 4.985796091047871e-07, "logits/chosen": -3.0344669818878174, "logits/rejected": -2.965481996536255, "logps/chosen": -342.3845520019531, "logps/rejected": -299.60418701171875, "loss": 0.6836, "rewards/accuracies": 0.643750011920929, "rewards/chosen": 0.017673885449767113, "rewards/margins": 0.02089458890259266, "rewards/rejected": -0.0032207041513174772, "step": 190 }, { "epoch": 0.21, "learning_rate": 4.982616925512907e-07, "logits/chosen": -2.9737305641174316, "logits/rejected": -2.9367403984069824, "logps/chosen": -344.54833984375, "logps/rejected": -315.73516845703125, "loss": 0.6836, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.02112628147006035, "rewards/margins": 0.02438109926879406, "rewards/rejected": -0.0032548182643949986, "step": 200 }, { "epoch": 0.21, "eval_logits/chosen": -3.024318218231201, "eval_logits/rejected": -2.9700303077697754, "eval_logps/chosen": -343.5496826171875, "eval_logps/rejected": -303.65081787109375, "eval_loss": 0.6830371022224426, "eval_rewards/accuracies": 0.6448412537574768, "eval_rewards/chosen": 0.01494832057505846, "eval_rewards/margins": 0.0244095791131258, "eval_rewards/rejected": -0.009461257606744766, "eval_runtime": 246.2124, "eval_samples_per_second": 8.123, "eval_steps_per_second": 0.256, "step": 200 }, { "epoch": 0.22, "learning_rate": 4.979118195213728e-07, "logits/chosen": -3.0241358280181885, "logits/rejected": -2.944836378097534, "logps/chosen": -367.56805419921875, "logps/rejected": -298.8295593261719, "loss": 0.682, "rewards/accuracies": 0.606249988079071, "rewards/chosen": 0.01484000962227583, "rewards/margins": 0.023524824529886246, "rewards/rejected": -0.008684814907610416, "step": 210 }, { "epoch": 0.23, "learning_rate": 4.975300350510161e-07, "logits/chosen": -3.002933979034424, "logits/rejected": -2.9467215538024902, "logps/chosen": -350.8382263183594, "logps/rejected": -319.9858703613281, "loss": 0.682, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.01789170131087303, "rewards/margins": 0.02975825034081936, "rewards/rejected": -0.011866547167301178, "step": 220 }, { "epoch": 0.24, "learning_rate": 4.971163882838732e-07, "logits/chosen": -2.9935097694396973, "logits/rejected": -2.9317831993103027, "logps/chosen": -349.8140563964844, "logps/rejected": -288.98077392578125, "loss": 0.6769, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.011691467836499214, "rewards/margins": 0.029249072074890137, "rewards/rejected": -0.017557602375745773, "step": 230 }, { "epoch": 0.25, "learning_rate": 4.966709324649415e-07, "logits/chosen": -2.9749813079833984, "logits/rejected": -2.9110968112945557, "logps/chosen": -335.0533447265625, "logps/rejected": -266.52215576171875, "loss": 0.6787, "rewards/accuracies": 0.612500011920929, "rewards/chosen": 0.01340649463236332, "rewards/margins": 0.027955498546361923, "rewards/rejected": -0.014549002051353455, "step": 240 }, { "epoch": 0.26, "learning_rate": 4.961937249337091e-07, "logits/chosen": -2.9835305213928223, "logits/rejected": -2.9534316062927246, "logps/chosen": -320.1576232910156, "logps/rejected": -320.8951721191406, "loss": 0.68, "rewards/accuracies": 0.65625, "rewards/chosen": 0.009979739785194397, "rewards/margins": 0.03077465295791626, "rewards/rejected": -0.020794907584786415, "step": 250 }, { "epoch": 0.27, "learning_rate": 4.956848271167743e-07, "logits/chosen": -2.9606919288635254, "logits/rejected": -2.9051098823547363, "logps/chosen": -341.62701416015625, "logps/rejected": -306.46734619140625, "loss": 0.6771, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.004492092411965132, "rewards/margins": 0.0419074110686779, "rewards/rejected": -0.03741531819105148, "step": 260 }, { "epoch": 0.28, "learning_rate": 4.951443045199382e-07, "logits/chosen": -3.039965867996216, "logits/rejected": -2.974046230316162, "logps/chosen": -353.82366943359375, "logps/rejected": -296.0869140625, "loss": 0.6743, "rewards/accuracies": 0.71875, "rewards/chosen": 0.013023038394749165, "rewards/margins": 0.053806107491254807, "rewards/rejected": -0.04078306630253792, "step": 270 }, { "epoch": 0.29, "learning_rate": 4.945722267197731e-07, "logits/chosen": -3.0149953365325928, "logits/rejected": -2.9855613708496094, "logps/chosen": -353.22808837890625, "logps/rejected": -332.72735595703125, "loss": 0.6759, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.0013278704136610031, "rewards/margins": 0.0351131446659565, "rewards/rejected": -0.03644100949168205, "step": 280 }, { "epoch": 0.3, "learning_rate": 4.939686673546668e-07, "logits/chosen": -2.979219675064087, "logits/rejected": -2.940520763397217, "logps/chosen": -338.106201171875, "logps/rejected": -298.94110107421875, "loss": 0.6718, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.008899662643671036, "rewards/margins": 0.0386703684926033, "rewards/rejected": -0.047570034861564636, "step": 290 }, { "epoch": 0.31, "learning_rate": 4.93333704115343e-07, "logits/chosen": -2.9941365718841553, "logits/rejected": -2.884251117706299, "logps/chosen": -341.53387451171875, "logps/rejected": -282.03814697265625, "loss": 0.6662, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.00807519443333149, "rewards/margins": 0.05831586569547653, "rewards/rejected": -0.06639105826616287, "step": 300 }, { "epoch": 0.31, "eval_logits/chosen": -2.9976212978363037, "eval_logits/rejected": -2.942260980606079, "eval_logps/chosen": -346.3836364746094, "eval_logps/rejected": -309.570068359375, "eval_loss": 0.6711614727973938, "eval_rewards/accuracies": 0.6746031641960144, "eval_rewards/chosen": -0.013390865176916122, "eval_rewards/margins": 0.05526304244995117, "eval_rewards/rejected": -0.06865391135215759, "eval_runtime": 248.4655, "eval_samples_per_second": 8.049, "eval_steps_per_second": 0.254, "step": 300 }, { "epoch": 0.32, "learning_rate": 4.926674187348617e-07, "logits/chosen": -2.9200305938720703, "logits/rejected": -2.890380382537842, "logps/chosen": -342.61090087890625, "logps/rejected": -319.4290466308594, "loss": 0.6655, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.016244180500507355, "rewards/margins": 0.06994068622589111, "rewards/rejected": -0.08618486672639847, "step": 310 }, { "epoch": 0.33, "learning_rate": 4.91969896978098e-07, "logits/chosen": -2.929490804672241, "logits/rejected": -2.892176866531372, "logps/chosen": -334.2842712402344, "logps/rejected": -318.81976318359375, "loss": 0.6658, "rewards/accuracies": 0.59375, "rewards/chosen": -0.013753254897892475, "rewards/margins": 0.05771704763174057, "rewards/rejected": -0.07147030532360077, "step": 320 }, { "epoch": 0.35, "learning_rate": 4.912412286307025e-07, "logits/chosen": -2.908052921295166, "logits/rejected": -2.8586883544921875, "logps/chosen": -334.84564208984375, "logps/rejected": -278.5325927734375, "loss": 0.6698, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.014625480398535728, "rewards/margins": 0.06231003254652023, "rewards/rejected": -0.07693551480770111, "step": 330 }, { "epoch": 0.36, "learning_rate": 4.904815074875438e-07, "logits/chosen": -2.9351956844329834, "logits/rejected": -2.882476806640625, "logps/chosen": -297.5643310546875, "logps/rejected": -270.01666259765625, "loss": 0.6703, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.05379326269030571, "rewards/margins": 0.038004521280527115, "rewards/rejected": -0.09179778397083282, "step": 340 }, { "epoch": 0.37, "learning_rate": 4.896908313406355e-07, "logits/chosen": -2.9030866622924805, "logits/rejected": -2.8945038318634033, "logps/chosen": -336.22027587890625, "logps/rejected": -334.49365234375, "loss": 0.6683, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05760540813207626, "rewards/margins": 0.0570923313498497, "rewards/rejected": -0.11469773948192596, "step": 350 }, { "epoch": 0.38, "learning_rate": 4.88869301966548e-07, "logits/chosen": -2.961442232131958, "logits/rejected": -2.9141147136688232, "logps/chosen": -337.873046875, "logps/rejected": -292.5345153808594, "loss": 0.6599, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.07577836513519287, "rewards/margins": 0.0639306977391243, "rewards/rejected": -0.13970905542373657, "step": 360 }, { "epoch": 0.39, "learning_rate": 4.880170251133081e-07, "logits/chosen": -2.8861405849456787, "logits/rejected": -2.873425006866455, "logps/chosen": -290.9164123535156, "logps/rejected": -303.1529846191406, "loss": 0.6579, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0845533087849617, "rewards/margins": 0.07466953992843628, "rewards/rejected": -0.15922284126281738, "step": 370 }, { "epoch": 0.4, "learning_rate": 4.871341104867864e-07, "logits/chosen": -2.9816460609436035, "logits/rejected": -2.9324944019317627, "logps/chosen": -363.70806884765625, "logps/rejected": -322.94158935546875, "loss": 0.6615, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.07034312188625336, "rewards/margins": 0.08779822289943695, "rewards/rejected": -0.15814131498336792, "step": 380 }, { "epoch": 0.41, "learning_rate": 4.862206717365765e-07, "logits/chosen": -2.8958492279052734, "logits/rejected": -2.8334128856658936, "logps/chosen": -334.65643310546875, "logps/rejected": -287.338134765625, "loss": 0.6634, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.06828784197568893, "rewards/margins": 0.08057762682437897, "rewards/rejected": -0.1488654762506485, "step": 390 }, { "epoch": 0.42, "learning_rate": 4.852768264413655e-07, "logits/chosen": -2.973942756652832, "logits/rejected": -2.9239089488983154, "logps/chosen": -374.6542663574219, "logps/rejected": -325.08868408203125, "loss": 0.6538, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.0820477306842804, "rewards/margins": 0.0960758849978447, "rewards/rejected": -0.1781235933303833, "step": 400 }, { "epoch": 0.42, "eval_logits/chosen": -2.954821825027466, "eval_logits/rejected": -2.897944450378418, "eval_logps/chosen": -353.1802062988281, "eval_logps/rejected": -320.7437744140625, "eval_loss": 0.6571324467658997, "eval_rewards/accuracies": 0.6765872836112976, "eval_rewards/chosen": -0.08135689049959183, "eval_rewards/margins": 0.09903378784656525, "eval_rewards/rejected": -0.18039065599441528, "eval_runtime": 240.8082, "eval_samples_per_second": 8.305, "eval_steps_per_second": 0.262, "step": 400 }, { "epoch": 0.43, "learning_rate": 4.843026960937995e-07, "logits/chosen": -3.005959987640381, "logits/rejected": -2.960634708404541, "logps/chosen": -376.323974609375, "logps/rejected": -351.9470520019531, "loss": 0.6532, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.0597182922065258, "rewards/margins": 0.10858096927404404, "rewards/rejected": -0.16829927265644073, "step": 410 }, { "epoch": 0.44, "learning_rate": 4.832984060848445e-07, "logits/chosen": -2.883970260620117, "logits/rejected": -2.8291678428649902, "logps/chosen": -314.81866455078125, "logps/rejected": -272.30511474609375, "loss": 0.652, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.0751439556479454, "rewards/margins": 0.10195841640233994, "rewards/rejected": -0.17710237205028534, "step": 420 }, { "epoch": 0.45, "learning_rate": 4.822640856876464e-07, "logits/chosen": -2.9058735370635986, "logits/rejected": -2.877462863922119, "logps/chosen": -320.7926940917969, "logps/rejected": -292.57293701171875, "loss": 0.6553, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.09108451753854752, "rewards/margins": 0.0795835480093956, "rewards/rejected": -0.1706680804491043, "step": 430 }, { "epoch": 0.46, "learning_rate": 4.811998680408907e-07, "logits/chosen": -2.9125401973724365, "logits/rejected": -2.903066873550415, "logps/chosen": -323.45245361328125, "logps/rejected": -306.0644226074219, "loss": 0.6524, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.13419905304908752, "rewards/margins": 0.06887730956077576, "rewards/rejected": -0.2030763328075409, "step": 440 }, { "epoch": 0.47, "learning_rate": 4.801058901316645e-07, "logits/chosen": -2.8140056133270264, "logits/rejected": -2.7843329906463623, "logps/chosen": -326.67822265625, "logps/rejected": -301.6144714355469, "loss": 0.647, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.11106200516223907, "rewards/margins": 0.14083310961723328, "rewards/rejected": -0.25189512968063354, "step": 450 }, { "epoch": 0.48, "learning_rate": 4.78982292777824e-07, "logits/chosen": -2.905949354171753, "logits/rejected": -2.8415050506591797, "logps/chosen": -332.97406005859375, "logps/rejected": -316.91937255859375, "loss": 0.6423, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.121739961206913, "rewards/margins": 0.10120918601751328, "rewards/rejected": -0.22294914722442627, "step": 460 }, { "epoch": 0.49, "learning_rate": 4.778292206098673e-07, "logits/chosen": -2.9177937507629395, "logits/rejected": -2.8414828777313232, "logps/chosen": -376.5325927734375, "logps/rejected": -326.441162109375, "loss": 0.6372, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.11956751346588135, "rewards/margins": 0.19230221211910248, "rewards/rejected": -0.311869740486145, "step": 470 }, { "epoch": 0.5, "learning_rate": 4.7664682205231877e-07, "logits/chosen": -2.83701753616333, "logits/rejected": -2.8011679649353027, "logps/chosen": -291.61614990234375, "logps/rejected": -290.9491882324219, "loss": 0.6612, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.15953049063682556, "rewards/margins": 0.07381532341241837, "rewards/rejected": -0.23334583640098572, "step": 480 }, { "epoch": 0.51, "learning_rate": 4.754352493046224e-07, "logits/chosen": -2.9087753295898438, "logits/rejected": -2.8327887058258057, "logps/chosen": -348.1717224121094, "logps/rejected": -318.3050842285156, "loss": 0.6416, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.16357417404651642, "rewards/margins": 0.13152363896369934, "rewards/rejected": -0.29509779810905457, "step": 490 }, { "epoch": 0.52, "learning_rate": 4.741946583215514e-07, "logits/chosen": -2.8774545192718506, "logits/rejected": -2.8319413661956787, "logps/chosen": -337.0196838378906, "logps/rejected": -319.65899658203125, "loss": 0.6405, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.17791475355625153, "rewards/margins": 0.15151168406009674, "rewards/rejected": -0.32942643761634827, "step": 500 }, { "epoch": 0.52, "eval_logits/chosen": -2.9120142459869385, "eval_logits/rejected": -2.8540806770324707, "eval_logps/chosen": -364.5343933105469, "eval_logps/rejected": -337.2181396484375, "eval_loss": 0.6447514891624451, "eval_rewards/accuracies": 0.6726190447807312, "eval_rewards/chosen": -0.19489827752113342, "eval_rewards/margins": 0.1502356082201004, "eval_rewards/rejected": -0.345133900642395, "eval_runtime": 248.0229, "eval_samples_per_second": 8.064, "eval_steps_per_second": 0.254, "step": 500 }, { "epoch": 0.53, "learning_rate": 4.729252087931332e-07, "logits/chosen": -2.8862080574035645, "logits/rejected": -2.8038384914398193, "logps/chosen": -377.8186340332031, "logps/rejected": -323.3064880371094, "loss": 0.6386, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.17217716574668884, "rewards/margins": 0.19356265664100647, "rewards/rejected": -0.3657398223876953, "step": 510 }, { "epoch": 0.54, "learning_rate": 4.716270641240941e-07, "logits/chosen": -2.8480124473571777, "logits/rejected": -2.811933994293213, "logps/chosen": -320.3323974609375, "logps/rejected": -317.38763427734375, "loss": 0.6423, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.148501917719841, "rewards/margins": 0.18107430636882782, "rewards/rejected": -0.3295762240886688, "step": 520 }, { "epoch": 0.55, "learning_rate": 4.703003914128258e-07, "logits/chosen": -2.847687244415283, "logits/rejected": -2.8126273155212402, "logps/chosen": -355.64996337890625, "logps/rejected": -330.8966369628906, "loss": 0.6401, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.15957526862621307, "rewards/margins": 0.15057061612606049, "rewards/rejected": -0.31014585494995117, "step": 530 }, { "epoch": 0.57, "learning_rate": 4.689453614298758e-07, "logits/chosen": -2.8674135208129883, "logits/rejected": -2.8252620697021484, "logps/chosen": -375.3172302246094, "logps/rejected": -377.0331726074219, "loss": 0.6343, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.169221892952919, "rewards/margins": 0.15300126373767853, "rewards/rejected": -0.32222312688827515, "step": 540 }, { "epoch": 0.58, "learning_rate": 4.6756214859596645e-07, "logits/chosen": -2.8661575317382812, "logits/rejected": -2.80385684967041, "logps/chosen": -347.62969970703125, "logps/rejected": -327.3890380859375, "loss": 0.6463, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.24611690640449524, "rewards/margins": 0.16446547210216522, "rewards/rejected": -0.41058236360549927, "step": 550 }, { "epoch": 0.59, "learning_rate": 4.661509309595426e-07, "logits/chosen": -2.8666725158691406, "logits/rejected": -2.801342487335205, "logps/chosen": -344.13330078125, "logps/rejected": -313.7489929199219, "loss": 0.6312, "rewards/accuracies": 0.6875, "rewards/chosen": -0.24062931537628174, "rewards/margins": 0.20030991733074188, "rewards/rejected": -0.4409392476081848, "step": 560 }, { "epoch": 0.6, "learning_rate": 4.647118901738537e-07, "logits/chosen": -2.8669309616088867, "logits/rejected": -2.7961795330047607, "logps/chosen": -360.96478271484375, "logps/rejected": -339.89886474609375, "loss": 0.644, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.25550180673599243, "rewards/margins": 0.14098383486270905, "rewards/rejected": -0.39648565649986267, "step": 570 }, { "epoch": 0.61, "learning_rate": 4.632452114735706e-07, "logits/chosen": -2.814235210418701, "logits/rejected": -2.757559061050415, "logps/chosen": -350.2564697265625, "logps/rejected": -310.9596252441406, "loss": 0.6359, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.24496586620807648, "rewards/margins": 0.14976012706756592, "rewards/rejected": -0.3947260081768036, "step": 580 }, { "epoch": 0.62, "learning_rate": 4.617510836509424e-07, "logits/chosen": -2.8700594902038574, "logits/rejected": -2.832390069961548, "logps/chosen": -341.0650329589844, "logps/rejected": -324.2391357421875, "loss": 0.638, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.1659560203552246, "rewards/margins": 0.10084130614995956, "rewards/rejected": -0.26679736375808716, "step": 590 }, { "epoch": 0.63, "learning_rate": 4.602296990314949e-07, "logits/chosen": -2.8588919639587402, "logits/rejected": -2.809976100921631, "logps/chosen": -410.419189453125, "logps/rejected": -396.41949462890625, "loss": 0.6394, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.17672498524188995, "rewards/margins": 0.19581544399261475, "rewards/rejected": -0.3725404143333435, "step": 600 }, { "epoch": 0.63, "eval_logits/chosen": -2.8732762336730957, "eval_logits/rejected": -2.814659595489502, "eval_logps/chosen": -368.0754089355469, "eval_logps/rejected": -344.1863098144531, "eval_loss": 0.6372315883636475, "eval_rewards/accuracies": 0.682539701461792, "eval_rewards/chosen": -0.2303084284067154, "eval_rewards/margins": 0.18450765311717987, "eval_rewards/rejected": -0.41481611132621765, "eval_runtime": 248.1999, "eval_samples_per_second": 8.058, "eval_steps_per_second": 0.254, "step": 600 }, { "epoch": 0.64, "learning_rate": 4.5868125344927397e-07, "logits/chosen": -2.881749153137207, "logits/rejected": -2.810695171356201, "logps/chosen": -356.07464599609375, "logps/rejected": -302.9112854003906, "loss": 0.6283, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.22698974609375, "rewards/margins": 0.1910962164402008, "rewards/rejected": -0.4180859625339508, "step": 610 }, { "epoch": 0.65, "learning_rate": 4.5710594622163814e-07, "logits/chosen": -2.8739027976989746, "logits/rejected": -2.8048624992370605, "logps/chosen": -364.158935546875, "logps/rejected": -319.1551208496094, "loss": 0.6286, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.29370805621147156, "rewards/margins": 0.15704287588596344, "rewards/rejected": -0.4507509171962738, "step": 620 }, { "epoch": 0.66, "learning_rate": 4.555039801236017e-07, "logits/chosen": -2.746525287628174, "logits/rejected": -2.707292079925537, "logps/chosen": -338.1776123046875, "logps/rejected": -335.0467224121094, "loss": 0.6272, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.21730093657970428, "rewards/margins": 0.19230973720550537, "rewards/rejected": -0.40961068868637085, "step": 630 }, { "epoch": 0.67, "learning_rate": 4.538755613617336e-07, "logits/chosen": -2.791337251663208, "logits/rejected": -2.7343456745147705, "logps/chosen": -348.16802978515625, "logps/rejected": -334.0476989746094, "loss": 0.6273, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.23033007979393005, "rewards/margins": 0.19607330858707428, "rewards/rejected": -0.42640337347984314, "step": 640 }, { "epoch": 0.68, "learning_rate": 4.522208995476145e-07, "logits/chosen": -2.889570713043213, "logits/rejected": -2.7822928428649902, "logps/chosen": -401.09234619140625, "logps/rejected": -364.5537414550781, "loss": 0.6171, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.23586571216583252, "rewards/margins": 0.25756314396858215, "rewards/rejected": -0.4934287965297699, "step": 650 }, { "epoch": 0.69, "learning_rate": 4.50540207670855e-07, "logits/chosen": -2.8644819259643555, "logits/rejected": -2.8281166553497314, "logps/chosen": -379.15887451171875, "logps/rejected": -360.2496337890625, "loss": 0.6235, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.24248214066028595, "rewards/margins": 0.2142285406589508, "rewards/rejected": -0.45671066641807556, "step": 660 }, { "epoch": 0.7, "learning_rate": 4.488337020716798e-07, "logits/chosen": -2.8308560848236084, "logits/rejected": -2.8049094676971436, "logps/chosen": -365.39263916015625, "logps/rejected": -360.0053405761719, "loss": 0.6242, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.26678240299224854, "rewards/margins": 0.21639254689216614, "rewards/rejected": -0.4831749498844147, "step": 670 }, { "epoch": 0.71, "learning_rate": 4.4710160241307993e-07, "logits/chosen": -2.765575885772705, "logits/rejected": -2.7461342811584473, "logps/chosen": -347.0950012207031, "logps/rejected": -324.2687683105469, "loss": 0.6322, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.3175194561481476, "rewards/margins": 0.09245363622903824, "rewards/rejected": -0.4099730849266052, "step": 680 }, { "epoch": 0.72, "learning_rate": 4.453441316525376e-07, "logits/chosen": -2.7600603103637695, "logits/rejected": -2.700854539871216, "logps/chosen": -349.8055725097656, "logps/rejected": -332.6409606933594, "loss": 0.63, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.3004549443721771, "rewards/margins": 0.1717255860567093, "rewards/rejected": -0.4721805155277252, "step": 690 }, { "epoch": 0.73, "learning_rate": 4.435615160133268e-07, "logits/chosen": -2.791268825531006, "logits/rejected": -2.7100212574005127, "logps/chosen": -335.6754150390625, "logps/rejected": -332.56280517578125, "loss": 0.6218, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.34759417176246643, "rewards/margins": 0.21199102699756622, "rewards/rejected": -0.5595852136611938, "step": 700 }, { "epoch": 0.73, "eval_logits/chosen": -2.8268723487854004, "eval_logits/rejected": -2.766594648361206, "eval_logps/chosen": -373.9845275878906, "eval_logps/rejected": -353.7791748046875, "eval_loss": 0.631250262260437, "eval_rewards/accuracies": 0.682539701461792, "eval_rewards/chosen": -0.28939977288246155, "eval_rewards/margins": 0.22134484350681305, "eval_rewards/rejected": -0.5107446312904358, "eval_runtime": 250.2322, "eval_samples_per_second": 7.993, "eval_steps_per_second": 0.252, "step": 700 }, { "epoch": 0.74, "learning_rate": 4.4175398495539397e-07, "logits/chosen": -2.8154489994049072, "logits/rejected": -2.7193312644958496, "logps/chosen": -390.2218322753906, "logps/rejected": -323.1303405761719, "loss": 0.6142, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.25868695974349976, "rewards/margins": 0.22367513179779053, "rewards/rejected": -0.48236212134361267, "step": 710 }, { "epoch": 0.75, "learning_rate": 4.3992177114582117e-07, "logits/chosen": -2.8137097358703613, "logits/rejected": -2.7677464485168457, "logps/chosen": -371.6590576171875, "logps/rejected": -349.91522216796875, "loss": 0.6315, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.24545975029468536, "rewards/margins": 0.20291368663311005, "rewards/rejected": -0.4483734965324402, "step": 720 }, { "epoch": 0.76, "learning_rate": 4.380651104288776e-07, "logits/chosen": -2.79219913482666, "logits/rejected": -2.7212865352630615, "logps/chosen": -383.16070556640625, "logps/rejected": -343.8384094238281, "loss": 0.6285, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.2518269717693329, "rewards/margins": 0.21711906790733337, "rewards/rejected": -0.46894603967666626, "step": 730 }, { "epoch": 0.77, "learning_rate": 4.3618424179566094e-07, "logits/chosen": -2.7794361114501953, "logits/rejected": -2.7013282775878906, "logps/chosen": -409.0721130371094, "logps/rejected": -345.78033447265625, "loss": 0.6197, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.23068316280841827, "rewards/margins": 0.23051229119300842, "rewards/rejected": -0.4611954689025879, "step": 740 }, { "epoch": 0.78, "learning_rate": 4.3427940735333436e-07, "logits/chosen": -2.7824223041534424, "logits/rejected": -2.7694931030273438, "logps/chosen": -373.7041931152344, "logps/rejected": -375.1035461425781, "loss": 0.6172, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.33885717391967773, "rewards/margins": 0.19208331406116486, "rewards/rejected": -0.5309404134750366, "step": 750 }, { "epoch": 0.8, "learning_rate": 4.323508522939624e-07, "logits/chosen": -2.750168800354004, "logits/rejected": -2.710522174835205, "logps/chosen": -366.13519287109375, "logps/rejected": -355.31829833984375, "loss": 0.6092, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.4173508286476135, "rewards/margins": 0.22161607444286346, "rewards/rejected": -0.6389669179916382, "step": 760 }, { "epoch": 0.81, "learning_rate": 4.3039882486294966e-07, "logits/chosen": -2.7729387283325195, "logits/rejected": -2.747562885284424, "logps/chosen": -393.59881591796875, "logps/rejected": -406.81768798828125, "loss": 0.6199, "rewards/accuracies": 0.65625, "rewards/chosen": -0.38891881704330444, "rewards/margins": 0.215108722448349, "rewards/rejected": -0.604027509689331, "step": 770 }, { "epoch": 0.82, "learning_rate": 4.2842357632708603e-07, "logits/chosen": -2.7065768241882324, "logits/rejected": -2.6670963764190674, "logps/chosen": -340.0065002441406, "logps/rejected": -324.4345397949219, "loss": 0.6215, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.2909296154975891, "rewards/margins": 0.1907232701778412, "rewards/rejected": -0.4816528856754303, "step": 780 }, { "epoch": 0.83, "learning_rate": 4.264253609422038e-07, "logits/chosen": -2.7775344848632812, "logits/rejected": -2.7437610626220703, "logps/chosen": -391.9022521972656, "logps/rejected": -384.43426513671875, "loss": 0.6313, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.2783506214618683, "rewards/margins": 0.23514008522033691, "rewards/rejected": -0.5134907960891724, "step": 790 }, { "epoch": 0.84, "learning_rate": 4.244044359204495e-07, "logits/chosen": -2.713089942932129, "logits/rejected": -2.6497960090637207, "logps/chosen": -433.2242126464844, "logps/rejected": -373.65130615234375, "loss": 0.6035, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.29666823148727417, "rewards/margins": 0.24842536449432373, "rewards/rejected": -0.5450935959815979, "step": 800 }, { "epoch": 0.84, "eval_logits/chosen": -2.7681221961975098, "eval_logits/rejected": -2.7056021690368652, "eval_logps/chosen": -381.1849060058594, "eval_logps/rejected": -364.1535949707031, "eval_loss": 0.6248704195022583, "eval_rewards/accuracies": 0.6845238208770752, "eval_rewards/chosen": -0.3614034950733185, "eval_rewards/margins": 0.2530852258205414, "eval_rewards/rejected": -0.6144886612892151, "eval_runtime": 249.0101, "eval_samples_per_second": 8.032, "eval_steps_per_second": 0.253, "step": 800 }, { "epoch": 0.85, "learning_rate": 4.223610613971753e-07, "logits/chosen": -2.7306289672851562, "logits/rejected": -2.6280109882354736, "logps/chosen": -378.93804931640625, "logps/rejected": -328.80938720703125, "loss": 0.6265, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.3830115795135498, "rewards/margins": 0.13664865493774414, "rewards/rejected": -0.519660234451294, "step": 810 }, { "epoch": 0.86, "learning_rate": 4.2029550039745396e-07, "logits/chosen": -2.674085855484009, "logits/rejected": -2.6277194023132324, "logps/chosen": -331.6683654785156, "logps/rejected": -327.3447570800781, "loss": 0.6375, "rewards/accuracies": 0.65625, "rewards/chosen": -0.3621932566165924, "rewards/margins": 0.2161780148744583, "rewards/rejected": -0.5783712267875671, "step": 820 }, { "epoch": 0.87, "learning_rate": 4.1820801880222236e-07, "logits/chosen": -2.6937668323516846, "logits/rejected": -2.678345203399658, "logps/chosen": -336.51519775390625, "logps/rejected": -338.04058837890625, "loss": 0.6178, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.372277170419693, "rewards/margins": 0.2381594181060791, "rewards/rejected": -0.6104366183280945, "step": 830 }, { "epoch": 0.88, "learning_rate": 4.160988853140567e-07, "logits/chosen": -2.68011212348938, "logits/rejected": -2.6486706733703613, "logps/chosen": -405.85235595703125, "logps/rejected": -377.17059326171875, "loss": 0.6225, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.44021159410476685, "rewards/margins": 0.13439835608005524, "rewards/rejected": -0.5746098756790161, "step": 840 }, { "epoch": 0.89, "learning_rate": 4.1396837142258507e-07, "logits/chosen": -2.757357597351074, "logits/rejected": -2.696622848510742, "logps/chosen": -403.8123779296875, "logps/rejected": -353.694091796875, "loss": 0.6194, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.355672687292099, "rewards/margins": 0.28402405977249146, "rewards/rejected": -0.6396967172622681, "step": 850 }, { "epoch": 0.9, "learning_rate": 4.1181675136954106e-07, "logits/chosen": -2.753262758255005, "logits/rejected": -2.715118646621704, "logps/chosen": -377.8106384277344, "logps/rejected": -364.499755859375, "loss": 0.6216, "rewards/accuracies": 0.625, "rewards/chosen": -0.4113299250602722, "rewards/margins": 0.2852802276611328, "rewards/rejected": -0.6966102123260498, "step": 860 }, { "epoch": 0.91, "learning_rate": 4.09644302113463e-07, "logits/chosen": -2.6933841705322266, "logits/rejected": -2.671048641204834, "logps/chosen": -337.9107666015625, "logps/rejected": -356.3653564453125, "loss": 0.6052, "rewards/accuracies": 0.71875, "rewards/chosen": -0.40935665369033813, "rewards/margins": 0.30245229601860046, "rewards/rejected": -0.7118089199066162, "step": 870 }, { "epoch": 0.92, "learning_rate": 4.0745130329404365e-07, "logits/chosen": -2.701093912124634, "logits/rejected": -2.63181471824646, "logps/chosen": -388.31011962890625, "logps/rejected": -363.6514587402344, "loss": 0.6198, "rewards/accuracies": 0.625, "rewards/chosen": -0.504043459892273, "rewards/margins": 0.22630378603935242, "rewards/rejected": -0.7303472757339478, "step": 880 }, { "epoch": 0.93, "learning_rate": 4.052380371961347e-07, "logits/chosen": -2.684615135192871, "logits/rejected": -2.652864933013916, "logps/chosen": -377.3784484863281, "logps/rejected": -379.3691711425781, "loss": 0.6286, "rewards/accuracies": 0.65625, "rewards/chosen": -0.5067042112350464, "rewards/margins": 0.17588753998279572, "rewards/rejected": -0.6825917363166809, "step": 890 }, { "epoch": 0.94, "learning_rate": 4.030047887134108e-07, "logits/chosen": -2.636793851852417, "logits/rejected": -2.5783472061157227, "logps/chosen": -407.3863220214844, "logps/rejected": -394.4078674316406, "loss": 0.6326, "rewards/accuracies": 0.625, "rewards/chosen": -0.48934751749038696, "rewards/margins": 0.17317138612270355, "rewards/rejected": -0.6625188589096069, "step": 900 }, { "epoch": 0.94, "eval_logits/chosen": -2.720741033554077, "eval_logits/rejected": -2.65683913230896, "eval_logps/chosen": -397.63446044921875, "eval_logps/rejected": -382.7857360839844, "eval_loss": 0.6203979253768921, "eval_rewards/accuracies": 0.6845238208770752, "eval_rewards/chosen": -0.5258990526199341, "eval_rewards/margins": 0.274911493062973, "eval_rewards/rejected": -0.8008105158805847, "eval_runtime": 244.5075, "eval_samples_per_second": 8.18, "eval_steps_per_second": 0.258, "step": 900 }, { "epoch": 0.95, "learning_rate": 4.007518453116979e-07, "logits/chosen": -2.6805593967437744, "logits/rejected": -2.6232123374938965, "logps/chosen": -357.9183044433594, "logps/rejected": -359.6230773925781, "loss": 0.6057, "rewards/accuracies": 0.65625, "rewards/chosen": -0.5844573974609375, "rewards/margins": 0.2230493128299713, "rewards/rejected": -0.8075065612792969, "step": 910 }, { "epoch": 0.96, "learning_rate": 3.984794969919702e-07, "logits/chosen": -2.6928341388702393, "logits/rejected": -2.6027913093566895, "logps/chosen": -404.1787414550781, "logps/rejected": -385.9433288574219, "loss": 0.6132, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.5911322832107544, "rewards/margins": 0.29310551285743713, "rewards/rejected": -0.8842377662658691, "step": 920 }, { "epoch": 0.97, "learning_rate": 3.96188036253021e-07, "logits/chosen": -2.7151689529418945, "logits/rejected": -2.6285009384155273, "logps/chosen": -379.22552490234375, "logps/rejected": -366.44720458984375, "loss": 0.5874, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.46869564056396484, "rewards/margins": 0.3491496443748474, "rewards/rejected": -0.8178452253341675, "step": 930 }, { "epoch": 0.98, "learning_rate": 3.938777580538119e-07, "logits/chosen": -2.695061683654785, "logits/rejected": -2.608922243118286, "logps/chosen": -434.6171875, "logps/rejected": -423.9627380371094, "loss": 0.6077, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.5053583383560181, "rewards/margins": 0.35829007625579834, "rewards/rejected": -0.8636484146118164, "step": 940 }, { "epoch": 0.99, "learning_rate": 3.9154895977550585e-07, "logits/chosen": -2.7094149589538574, "logits/rejected": -2.6375811100006104, "logps/chosen": -389.6069641113281, "logps/rejected": -375.0043029785156, "loss": 0.6176, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.4657767415046692, "rewards/margins": 0.2953377366065979, "rewards/rejected": -0.7611144185066223, "step": 950 }, { "epoch": 1.0, "learning_rate": 3.8920194118318725e-07, "logits/chosen": -2.649557590484619, "logits/rejected": -2.591104030609131, "logps/chosen": -365.44061279296875, "logps/rejected": -376.2007141113281, "loss": 0.6041, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3993779420852661, "rewards/margins": 0.3410153388977051, "rewards/rejected": -0.740393340587616, "step": 960 }, { "epoch": 1.02, "learning_rate": 3.868370043872768e-07, "logits/chosen": -2.683077812194824, "logits/rejected": -2.6237359046936035, "logps/chosen": -416.84088134765625, "logps/rejected": -408.75482177734375, "loss": 0.5978, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.43280115723609924, "rewards/margins": 0.34043940901756287, "rewards/rejected": -0.7732406258583069, "step": 970 }, { "epoch": 1.03, "learning_rate": 3.844544538046425e-07, "logits/chosen": -2.617851972579956, "logits/rejected": -2.592369556427002, "logps/chosen": -354.9996032714844, "logps/rejected": -388.5142822265625, "loss": 0.5878, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.4673759341239929, "rewards/margins": 0.39914676547050476, "rewards/rejected": -0.8665226101875305, "step": 980 }, { "epoch": 1.04, "learning_rate": 3.8205459611941577e-07, "logits/chosen": -2.669250011444092, "logits/rejected": -2.602257490158081, "logps/chosen": -417.526611328125, "logps/rejected": -391.24456787109375, "loss": 0.6071, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.4427572190761566, "rewards/margins": 0.2923399806022644, "rewards/rejected": -0.7350972890853882, "step": 990 }, { "epoch": 1.05, "learning_rate": 3.7963774024351423e-07, "logits/chosen": -2.6895923614501953, "logits/rejected": -2.6821107864379883, "logps/chosen": -368.69537353515625, "logps/rejected": -377.12225341796875, "loss": 0.6103, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.48284831643104553, "rewards/margins": 0.22640752792358398, "rewards/rejected": -0.7092558145523071, "step": 1000 }, { "epoch": 1.05, "eval_logits/chosen": -2.6968600749969482, "eval_logits/rejected": -2.6322262287139893, "eval_logps/chosen": -396.68231201171875, "eval_logps/rejected": -384.4855651855469, "eval_loss": 0.6144962310791016, "eval_rewards/accuracies": 0.6944444179534912, "eval_rewards/chosen": -0.5163776874542236, "eval_rewards/margins": 0.30143067240715027, "eval_rewards/rejected": -0.8178083896636963, "eval_runtime": 246.7178, "eval_samples_per_second": 8.106, "eval_steps_per_second": 0.255, "step": 1000 }, { "epoch": 1.06, "learning_rate": 3.7720419727687865e-07, "logits/chosen": -2.6810877323150635, "logits/rejected": -2.6230504512786865, "logps/chosen": -413.1744689941406, "logps/rejected": -382.6622314453125, "loss": 0.5989, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5526056289672852, "rewards/margins": 0.2373414933681488, "rewards/rejected": -0.7899471521377563, "step": 1010 }, { "epoch": 1.07, "learning_rate": 3.747542804674274e-07, "logits/chosen": -2.661088228225708, "logits/rejected": -2.6390786170959473, "logps/chosen": -398.89813232421875, "logps/rejected": -401.23431396484375, "loss": 0.6005, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.46416956186294556, "rewards/margins": 0.30864232778549194, "rewards/rejected": -0.7728118300437927, "step": 1020 }, { "epoch": 1.08, "learning_rate": 3.7228830517073527e-07, "logits/chosen": -2.622685670852661, "logits/rejected": -2.5972726345062256, "logps/chosen": -360.4078063964844, "logps/rejected": -360.20269775390625, "loss": 0.5902, "rewards/accuracies": 0.75, "rewards/chosen": -0.3580246567726135, "rewards/margins": 0.3438374400138855, "rewards/rejected": -0.7018621563911438, "step": 1030 }, { "epoch": 1.09, "learning_rate": 3.698065888094405e-07, "logits/chosen": -2.6089298725128174, "logits/rejected": -2.6035428047180176, "logps/chosen": -365.2914123535156, "logps/rejected": -406.7452087402344, "loss": 0.6074, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.43625983595848083, "rewards/margins": 0.285591185092926, "rewards/rejected": -0.7218509912490845, "step": 1040 }, { "epoch": 1.1, "learning_rate": 3.6730945083238594e-07, "logits/chosen": -2.6788554191589355, "logits/rejected": -2.60333251953125, "logps/chosen": -396.67388916015625, "logps/rejected": -369.4822998046875, "loss": 0.5952, "rewards/accuracies": 0.6875, "rewards/chosen": -0.4486874043941498, "rewards/margins": 0.3007965087890625, "rewards/rejected": -0.7494838833808899, "step": 1050 }, { "epoch": 1.11, "learning_rate": 3.64797212673499e-07, "logits/chosen": -2.663841962814331, "logits/rejected": -2.530757427215576, "logps/chosen": -434.9602966308594, "logps/rejected": -395.90704345703125, "loss": 0.5741, "rewards/accuracies": 0.71875, "rewards/chosen": -0.434063196182251, "rewards/margins": 0.4063114523887634, "rewards/rejected": -0.840374767780304, "step": 1060 }, { "epoch": 1.12, "learning_rate": 3.6227019771041664e-07, "logits/chosen": -2.631474018096924, "logits/rejected": -2.5514721870422363, "logps/chosen": -367.2853698730469, "logps/rejected": -334.1014709472656, "loss": 0.6031, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.45699542760849, "rewards/margins": 0.30584800243377686, "rewards/rejected": -0.7628434896469116, "step": 1070 }, { "epoch": 1.13, "learning_rate": 3.5972873122285994e-07, "logits/chosen": -2.6002821922302246, "logits/rejected": -2.5638835430145264, "logps/chosen": -362.3927307128906, "logps/rejected": -384.8812561035156, "loss": 0.5971, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5179690718650818, "rewards/margins": 0.29387664794921875, "rewards/rejected": -0.8118457794189453, "step": 1080 }, { "epoch": 1.14, "learning_rate": 3.571731403507635e-07, "logits/chosen": -2.6326613426208496, "logits/rejected": -2.5637834072113037, "logps/chosen": -416.04046630859375, "logps/rejected": -411.080810546875, "loss": 0.5785, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.5498114228248596, "rewards/margins": 0.36757707595825195, "rewards/rejected": -0.9173885583877563, "step": 1090 }, { "epoch": 1.15, "learning_rate": 3.5460375405216603e-07, "logits/chosen": -2.665194034576416, "logits/rejected": -2.601635456085205, "logps/chosen": -397.65960693359375, "logps/rejected": -401.44976806640625, "loss": 0.6002, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.6755828261375427, "rewards/margins": 0.26671653985977173, "rewards/rejected": -0.9422993659973145, "step": 1100 }, { "epoch": 1.15, "eval_logits/chosen": -2.668804883956909, "eval_logits/rejected": -2.6023752689361572, "eval_logps/chosen": -396.83331298828125, "eval_logps/rejected": -385.9577941894531, "eval_loss": 0.6116264462471008, "eval_rewards/accuracies": 0.692460298538208, "eval_rewards/chosen": -0.5178873538970947, "eval_rewards/margins": 0.31464365124702454, "eval_rewards/rejected": -0.8325309753417969, "eval_runtime": 250.5629, "eval_samples_per_second": 7.982, "eval_steps_per_second": 0.251, "step": 1100 }, { "epoch": 1.16, "learning_rate": 3.520209030608662e-07, "logits/chosen": -2.6696746349334717, "logits/rejected": -2.6097488403320312, "logps/chosen": -393.92529296875, "logps/rejected": -380.6165466308594, "loss": 0.5968, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.47735723853111267, "rewards/margins": 0.25016671419143677, "rewards/rejected": -0.727523922920227, "step": 1110 }, { "epoch": 1.17, "learning_rate": 3.4942491984385066e-07, "logits/chosen": -2.666564464569092, "logits/rejected": -2.6042842864990234, "logps/chosen": -403.80145263671875, "logps/rejected": -381.906982421875, "loss": 0.5944, "rewards/accuracies": 0.71875, "rewards/chosen": -0.4305171072483063, "rewards/margins": 0.3264329731464386, "rewards/rejected": -0.7569500207901001, "step": 1120 }, { "epoch": 1.18, "learning_rate": 3.468161385584982e-07, "logits/chosen": -2.6324963569641113, "logits/rejected": -2.5880188941955566, "logps/chosen": -381.2325744628906, "logps/rejected": -379.158203125, "loss": 0.5799, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3703802824020386, "rewards/margins": 0.34355229139328003, "rewards/rejected": -0.7139325141906738, "step": 1130 }, { "epoch": 1.19, "learning_rate": 3.441948950095672e-07, "logits/chosen": -2.6656455993652344, "logits/rejected": -2.5824806690216064, "logps/chosen": -407.565673828125, "logps/rejected": -368.3275451660156, "loss": 0.5875, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.43670734763145447, "rewards/margins": 0.37962377071380615, "rewards/rejected": -0.8163310885429382, "step": 1140 }, { "epoch": 1.2, "learning_rate": 3.4156152660596994e-07, "logits/chosen": -2.6464781761169434, "logits/rejected": -2.5937983989715576, "logps/chosen": -402.5704345703125, "logps/rejected": -392.44952392578125, "loss": 0.5997, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5296522974967957, "rewards/margins": 0.3139727711677551, "rewards/rejected": -0.8436250686645508, "step": 1150 }, { "epoch": 1.21, "learning_rate": 3.3891637231734125e-07, "logits/chosen": -2.6544508934020996, "logits/rejected": -2.595885753631592, "logps/chosen": -386.92401123046875, "logps/rejected": -395.77642822265625, "loss": 0.5905, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.44389739632606506, "rewards/margins": 0.35769903659820557, "rewards/rejected": -0.801596462726593, "step": 1160 }, { "epoch": 1.22, "learning_rate": 3.3625977263040643e-07, "logits/chosen": -2.666747570037842, "logits/rejected": -2.5960164070129395, "logps/chosen": -414.5166015625, "logps/rejected": -367.41424560546875, "loss": 0.5867, "rewards/accuracies": 0.65625, "rewards/chosen": -0.4271417260169983, "rewards/margins": 0.2684404253959656, "rewards/rejected": -0.6955822110176086, "step": 1170 }, { "epoch": 1.23, "learning_rate": 3.3359206950515266e-07, "logits/chosen": -2.6162502765655518, "logits/rejected": -2.5295591354370117, "logps/chosen": -400.8308410644531, "logps/rejected": -366.78558349609375, "loss": 0.5793, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5077975988388062, "rewards/margins": 0.36180368065834045, "rewards/rejected": -0.8696013689041138, "step": 1180 }, { "epoch": 1.25, "learning_rate": 3.3091360633081236e-07, "logits/chosen": -2.687870502471924, "logits/rejected": -2.6193759441375732, "logps/chosen": -402.45098876953125, "logps/rejected": -392.7806701660156, "loss": 0.5752, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.5487203598022461, "rewards/margins": 0.37653276324272156, "rewards/rejected": -0.9252530932426453, "step": 1190 }, { "epoch": 1.26, "learning_rate": 3.2822472788166146e-07, "logits/chosen": -2.6522316932678223, "logits/rejected": -2.5871059894561768, "logps/chosen": -410.39764404296875, "logps/rejected": -405.1246643066406, "loss": 0.5729, "rewards/accuracies": 0.71875, "rewards/chosen": -0.47655636072158813, "rewards/margins": 0.35264235734939575, "rewards/rejected": -0.8291987180709839, "step": 1200 }, { "epoch": 1.26, "eval_logits/chosen": -2.6376395225524902, "eval_logits/rejected": -2.570849657058716, "eval_logps/chosen": -403.4270935058594, "eval_logps/rejected": -394.70733642578125, "eval_loss": 0.6083069443702698, "eval_rewards/accuracies": 0.704365074634552, "eval_rewards/chosen": -0.5838249325752258, "eval_rewards/margins": 0.33620160818099976, "eval_rewards/rejected": -0.9200265407562256, "eval_runtime": 246.1597, "eval_samples_per_second": 8.125, "eval_steps_per_second": 0.256, "step": 1200 }, { "epoch": 1.27, "learning_rate": 3.2552578027263955e-07, "logits/chosen": -2.623401165008545, "logits/rejected": -2.5443992614746094, "logps/chosen": -361.82171630859375, "logps/rejected": -374.11334228515625, "loss": 0.5843, "rewards/accuracies": 0.6875, "rewards/chosen": -0.5367065668106079, "rewards/margins": 0.31009745597839355, "rewards/rejected": -0.8468039631843567, "step": 1210 }, { "epoch": 1.28, "learning_rate": 3.228171109147982e-07, "logits/chosen": -2.643584728240967, "logits/rejected": -2.5270514488220215, "logps/chosen": -410.5106506347656, "logps/rejected": -380.1089172363281, "loss": 0.5983, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.44848957657814026, "rewards/margins": 0.35878002643585205, "rewards/rejected": -0.8072696924209595, "step": 1220 }, { "epoch": 1.29, "learning_rate": 3.2009906847058125e-07, "logits/chosen": -2.639606475830078, "logits/rejected": -2.5487468242645264, "logps/chosen": -424.10595703125, "logps/rejected": -401.09674072265625, "loss": 0.587, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.49688854813575745, "rewards/margins": 0.3513595461845398, "rewards/rejected": -0.8482481241226196, "step": 1230 }, { "epoch": 1.3, "learning_rate": 3.1737200280894516e-07, "logits/chosen": -2.5839312076568604, "logits/rejected": -2.5728745460510254, "logps/chosen": -371.0592041015625, "logps/rejected": -380.7933349609375, "loss": 0.602, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.523228645324707, "rewards/margins": 0.30655089020729065, "rewards/rejected": -0.8297795057296753, "step": 1240 }, { "epoch": 1.31, "learning_rate": 3.146362649603233e-07, "logits/chosen": -2.596813917160034, "logits/rejected": -2.546060562133789, "logps/chosen": -406.51953125, "logps/rejected": -392.93890380859375, "loss": 0.5825, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.5865792036056519, "rewards/margins": 0.3280216157436371, "rewards/rejected": -0.9146007299423218, "step": 1250 }, { "epoch": 1.32, "learning_rate": 3.118922070714408e-07, "logits/chosen": -2.569214105606079, "logits/rejected": -2.54783034324646, "logps/chosen": -347.00006103515625, "logps/rejected": -362.70562744140625, "loss": 0.602, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5019794702529907, "rewards/margins": 0.271824449300766, "rewards/rejected": -0.7738040089607239, "step": 1260 }, { "epoch": 1.33, "learning_rate": 3.091401823599865e-07, "logits/chosen": -2.594982624053955, "logits/rejected": -2.5411128997802734, "logps/chosen": -371.3302917480469, "logps/rejected": -372.2278747558594, "loss": 0.5734, "rewards/accuracies": 0.75, "rewards/chosen": -0.3963788151741028, "rewards/margins": 0.3730164170265198, "rewards/rejected": -0.7693952322006226, "step": 1270 }, { "epoch": 1.34, "learning_rate": 3.063805450691458e-07, "logits/chosen": -2.655991315841675, "logits/rejected": -2.56858491897583, "logps/chosen": -389.52099609375, "logps/rejected": -369.41400146484375, "loss": 0.577, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.39786046743392944, "rewards/margins": 0.3743780255317688, "rewards/rejected": -0.7722384929656982, "step": 1280 }, { "epoch": 1.35, "learning_rate": 3.036136504220025e-07, "logits/chosen": -2.6017796993255615, "logits/rejected": -2.544804096221924, "logps/chosen": -393.8155517578125, "logps/rejected": -385.86541748046875, "loss": 0.5892, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.46426922082901, "rewards/margins": 0.35931476950645447, "rewards/rejected": -0.8235839605331421, "step": 1290 }, { "epoch": 1.36, "learning_rate": 3.0083985457581415e-07, "logits/chosen": -2.5615665912628174, "logits/rejected": -2.544076681137085, "logps/chosen": -372.3790588378906, "logps/rejected": -374.0905456542969, "loss": 0.599, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.5213760137557983, "rewards/margins": 0.2982157766819, "rewards/rejected": -0.8195918202400208, "step": 1300 }, { "epoch": 1.36, "eval_logits/chosen": -2.613380193710327, "eval_logits/rejected": -2.545426368713379, "eval_logps/chosen": -397.10211181640625, "eval_logps/rejected": -387.2309875488281, "eval_loss": 0.6077432632446289, "eval_rewards/accuracies": 0.7103174328804016, "eval_rewards/chosen": -0.5205760598182678, "eval_rewards/margins": 0.32468709349632263, "eval_rewards/rejected": -0.8452631831169128, "eval_runtime": 247.7632, "eval_samples_per_second": 8.072, "eval_steps_per_second": 0.254, "step": 1300 }, { "epoch": 1.37, "learning_rate": 2.9805951457616684e-07, "logits/chosen": -2.5732953548431396, "logits/rejected": -2.505056381225586, "logps/chosen": -370.58367919921875, "logps/rejected": -377.2376708984375, "loss": 0.5948, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.4765905737876892, "rewards/margins": 0.30376702547073364, "rewards/rejected": -0.7803575396537781, "step": 1310 }, { "epoch": 1.38, "learning_rate": 2.952729883110164e-07, "logits/chosen": -2.560969829559326, "logits/rejected": -2.4919774532318115, "logps/chosen": -375.130859375, "logps/rejected": -401.1944580078125, "loss": 0.582, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.5600558519363403, "rewards/margins": 0.3885023593902588, "rewards/rejected": -0.9485582113265991, "step": 1320 }, { "epoch": 1.39, "learning_rate": 2.924806344646205e-07, "logits/chosen": -2.5645477771759033, "logits/rejected": -2.5073866844177246, "logps/chosen": -408.23126220703125, "logps/rejected": -424.2571716308594, "loss": 0.5979, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.6115280389785767, "rewards/margins": 0.380671888589859, "rewards/rejected": -0.9921998977661133, "step": 1330 }, { "epoch": 1.4, "learning_rate": 2.896828124713684e-07, "logits/chosen": -2.549287796020508, "logits/rejected": -2.5001778602600098, "logps/chosen": -386.74102783203125, "logps/rejected": -394.2340393066406, "loss": 0.6196, "rewards/accuracies": 0.6875, "rewards/chosen": -0.6311862468719482, "rewards/margins": 0.3289794921875, "rewards/rejected": -0.960165798664093, "step": 1340 }, { "epoch": 1.41, "learning_rate": 2.8687988246951437e-07, "logits/chosen": -2.5453922748565674, "logits/rejected": -2.518723726272583, "logps/chosen": -342.259033203125, "logps/rejected": -360.65850830078125, "loss": 0.5773, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.47795337438583374, "rewards/margins": 0.33936068415641785, "rewards/rejected": -0.8173141479492188, "step": 1350 }, { "epoch": 1.42, "learning_rate": 2.8407220525482047e-07, "logits/chosen": -2.5927088260650635, "logits/rejected": -2.489152431488037, "logps/chosen": -413.27105712890625, "logps/rejected": -376.00689697265625, "loss": 0.5965, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4944841265678406, "rewards/margins": 0.2662241756916046, "rewards/rejected": -0.7607083916664124, "step": 1360 }, { "epoch": 1.43, "learning_rate": 2.81260142234114e-07, "logits/chosen": -2.6516880989074707, "logits/rejected": -2.5429511070251465, "logps/chosen": -403.22454833984375, "logps/rejected": -371.4908142089844, "loss": 0.5748, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.46684926748275757, "rewards/margins": 0.3777723014354706, "rewards/rejected": -0.8446215391159058, "step": 1370 }, { "epoch": 1.44, "learning_rate": 2.7844405537876766e-07, "logits/chosen": -2.5847606658935547, "logits/rejected": -2.5084991455078125, "logps/chosen": -383.04254150390625, "logps/rejected": -428.8330078125, "loss": 0.5614, "rewards/accuracies": 0.75, "rewards/chosen": -0.47037452459335327, "rewards/margins": 0.42992621660232544, "rewards/rejected": -0.9003008008003235, "step": 1380 }, { "epoch": 1.45, "learning_rate": 2.7562430717810586e-07, "logits/chosen": -2.5550408363342285, "logits/rejected": -2.54259991645813, "logps/chosen": -390.04278564453125, "logps/rejected": -383.87872314453125, "loss": 0.5857, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.5229761004447937, "rewards/margins": 0.38916486501693726, "rewards/rejected": -0.912140965461731, "step": 1390 }, { "epoch": 1.47, "learning_rate": 2.728012605927447e-07, "logits/chosen": -2.605335235595703, "logits/rejected": -2.5029044151306152, "logps/chosen": -421.9798889160156, "logps/rejected": -388.80914306640625, "loss": 0.5821, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.57950758934021, "rewards/margins": 0.4323285222053528, "rewards/rejected": -1.011836290359497, "step": 1400 }, { "epoch": 1.47, "eval_logits/chosen": -2.5899722576141357, "eval_logits/rejected": -2.5210795402526855, "eval_logps/chosen": -404.44964599609375, "eval_logps/rejected": -398.31060791015625, "eval_loss": 0.602461040019989, "eval_rewards/accuracies": 0.7063491940498352, "eval_rewards/chosen": -0.594050943851471, "eval_rewards/margins": 0.36200812458992004, "eval_rewards/rejected": -0.9560590386390686, "eval_runtime": 240.2615, "eval_samples_per_second": 8.324, "eval_steps_per_second": 0.262, "step": 1400 }, { "epoch": 1.48, "learning_rate": 2.699752790078714e-07, "logits/chosen": -2.576601982116699, "logits/rejected": -2.484036922454834, "logps/chosen": -422.91094970703125, "logps/rejected": -394.78570556640625, "loss": 0.578, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.5908924341201782, "rewards/margins": 0.4301966726779938, "rewards/rejected": -1.0210891962051392, "step": 1410 }, { "epoch": 1.49, "learning_rate": 2.6714672618646916e-07, "logits/chosen": -2.550550699234009, "logits/rejected": -2.5153775215148926, "logps/chosen": -406.955322265625, "logps/rejected": -409.8970642089844, "loss": 0.5851, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.5765306949615479, "rewards/margins": 0.34898003935813904, "rewards/rejected": -0.9255107045173645, "step": 1420 }, { "epoch": 1.5, "learning_rate": 2.643159662224931e-07, "logits/chosen": -2.510113477706909, "logits/rejected": -2.4473376274108887, "logps/chosen": -401.7076110839844, "logps/rejected": -385.7937927246094, "loss": 0.5892, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.5977416634559631, "rewards/margins": 0.2979085147380829, "rewards/rejected": -0.8956502079963684, "step": 1430 }, { "epoch": 1.51, "learning_rate": 2.6148336349400386e-07, "logits/chosen": -2.581653118133545, "logits/rejected": -2.4795095920562744, "logps/chosen": -414.6515197753906, "logps/rejected": -404.5352783203125, "loss": 0.5781, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.4708114564418793, "rewards/margins": 0.42272576689720154, "rewards/rejected": -0.893537163734436, "step": 1440 }, { "epoch": 1.52, "learning_rate": 2.5864928261626416e-07, "logits/chosen": -2.5162148475646973, "logits/rejected": -2.454847812652588, "logps/chosen": -392.3681945800781, "logps/rejected": -386.84161376953125, "loss": 0.5779, "rewards/accuracies": 0.6875, "rewards/chosen": -0.5253309011459351, "rewards/margins": 0.3526184856891632, "rewards/rejected": -0.8779493570327759, "step": 1450 }, { "epoch": 1.53, "learning_rate": 2.558140883948058e-07, "logits/chosen": -2.554378032684326, "logits/rejected": -2.4660162925720215, "logps/chosen": -411.94757080078125, "logps/rejected": -378.79522705078125, "loss": 0.5712, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.5396718978881836, "rewards/margins": 0.370322585105896, "rewards/rejected": -0.9099944829940796, "step": 1460 }, { "epoch": 1.54, "learning_rate": 2.5297814577847116e-07, "logits/chosen": -2.5671238899230957, "logits/rejected": -2.525606155395508, "logps/chosen": -404.15545654296875, "logps/rejected": -415.3843688964844, "loss": 0.5932, "rewards/accuracies": 0.71875, "rewards/chosen": -0.5830854177474976, "rewards/margins": 0.32624879479408264, "rewards/rejected": -0.9093341827392578, "step": 1470 }, { "epoch": 1.55, "learning_rate": 2.501418198124365e-07, "logits/chosen": -2.56373929977417, "logits/rejected": -2.491987705230713, "logps/chosen": -436.6615295410156, "logps/rejected": -413.1544494628906, "loss": 0.5685, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.5825528502464294, "rewards/margins": 0.4194890856742859, "rewards/rejected": -1.0020420551300049, "step": 1480 }, { "epoch": 1.56, "learning_rate": 2.473054755912234e-07, "logits/chosen": -2.5011954307556152, "logits/rejected": -2.4363338947296143, "logps/chosen": -406.9830017089844, "logps/rejected": -418.9451599121094, "loss": 0.5797, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.5498841404914856, "rewards/margins": 0.49638238549232483, "rewards/rejected": -1.0462663173675537, "step": 1490 }, { "epoch": 1.57, "learning_rate": 2.444694782117033e-07, "logits/chosen": -2.5000977516174316, "logits/rejected": -2.443779468536377, "logps/chosen": -392.17327880859375, "logps/rejected": -432.06195068359375, "loss": 0.574, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.6436781287193298, "rewards/margins": 0.43835169076919556, "rewards/rejected": -1.082029938697815, "step": 1500 }, { "epoch": 1.57, "eval_logits/chosen": -2.5592637062072754, "eval_logits/rejected": -2.488711357116699, "eval_logps/chosen": -411.21783447265625, "eval_logps/rejected": -407.4162292480469, "eval_loss": 0.5976974964141846, "eval_rewards/accuracies": 0.7142857313156128, "eval_rewards/chosen": -0.6617329716682434, "eval_rewards/margins": 0.3853819668292999, "eval_rewards/rejected": -1.0471149682998657, "eval_runtime": 249.6762, "eval_samples_per_second": 8.01, "eval_steps_per_second": 0.252, "step": 1500 }, { "epoch": 1.58, "learning_rate": 2.416341927261016e-07, "logits/chosen": -2.509632110595703, "logits/rejected": -2.4712796211242676, "logps/chosen": -388.2112121582031, "logps/rejected": -393.1347961425781, "loss": 0.5739, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.6419853568077087, "rewards/margins": 0.44171229004859924, "rewards/rejected": -1.0836976766586304, "step": 1510 }, { "epoch": 1.59, "learning_rate": 2.3879998409500845e-07, "logits/chosen": -2.5448498725891113, "logits/rejected": -2.4746804237365723, "logps/chosen": -435.6138610839844, "logps/rejected": -420.2179260253906, "loss": 0.5764, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.7542072534561157, "rewards/margins": 0.4248180389404297, "rewards/rejected": -1.1790252923965454, "step": 1520 }, { "epoch": 1.6, "learning_rate": 2.3596721714039998e-07, "logits/chosen": -2.4864585399627686, "logits/rejected": -2.4267334938049316, "logps/chosen": -395.937744140625, "logps/rejected": -394.3992919921875, "loss": 0.5586, "rewards/accuracies": 0.71875, "rewards/chosen": -0.689933180809021, "rewards/margins": 0.41859620809555054, "rewards/rejected": -1.1085295677185059, "step": 1530 }, { "epoch": 1.61, "learning_rate": 2.3313625649867824e-07, "logits/chosen": -2.5655341148376465, "logits/rejected": -2.4968507289886475, "logps/chosen": -454.5184631347656, "logps/rejected": -442.9791564941406, "loss": 0.5844, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.721511960029602, "rewards/margins": 0.4738715589046478, "rewards/rejected": -1.1953833103179932, "step": 1540 }, { "epoch": 1.62, "learning_rate": 2.303074665737355e-07, "logits/chosen": -2.503943920135498, "logits/rejected": -2.450364112854004, "logps/chosen": -424.10760498046875, "logps/rejected": -405.66070556640625, "loss": 0.5654, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.673719048500061, "rewards/margins": 0.3581104576587677, "rewards/rejected": -1.0318294763565063, "step": 1550 }, { "epoch": 1.63, "learning_rate": 2.274812114900469e-07, "logits/chosen": -2.505079507827759, "logits/rejected": -2.4630515575408936, "logps/chosen": -368.4876403808594, "logps/rejected": -392.97467041015625, "loss": 0.585, "rewards/accuracies": 0.65625, "rewards/chosen": -0.6174124479293823, "rewards/margins": 0.3502601981163025, "rewards/rejected": -0.9676725268363953, "step": 1560 }, { "epoch": 1.64, "learning_rate": 2.2465785504580074e-07, "logits/chosen": -2.4787967205047607, "logits/rejected": -2.406154155731201, "logps/chosen": -432.4744567871094, "logps/rejected": -413.21453857421875, "loss": 0.5799, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.658542811870575, "rewards/margins": 0.3376074433326721, "rewards/rejected": -0.9961503744125366, "step": 1570 }, { "epoch": 1.65, "learning_rate": 2.2183776066606947e-07, "logits/chosen": -2.5223982334136963, "logits/rejected": -2.51818585395813, "logps/chosen": -380.69573974609375, "logps/rejected": -432.8943786621094, "loss": 0.5772, "rewards/accuracies": 0.71875, "rewards/chosen": -0.5959492921829224, "rewards/margins": 0.47599101066589355, "rewards/rejected": -1.0719401836395264, "step": 1580 }, { "epoch": 1.66, "learning_rate": 2.190212913560298e-07, "logits/chosen": -2.4563565254211426, "logits/rejected": -2.4220333099365234, "logps/chosen": -383.9895324707031, "logps/rejected": -395.3768005371094, "loss": 0.5726, "rewards/accuracies": 0.71875, "rewards/chosen": -0.6407332420349121, "rewards/margins": 0.4355078339576721, "rewards/rejected": -1.076241135597229, "step": 1590 }, { "epoch": 1.67, "learning_rate": 2.1620880965423596e-07, "logits/chosen": -2.488713502883911, "logits/rejected": -2.434023380279541, "logps/chosen": -390.2538757324219, "logps/rejected": -380.91302490234375, "loss": 0.5716, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.6403465867042542, "rewards/margins": 0.4435759484767914, "rewards/rejected": -1.0839226245880127, "step": 1600 }, { "epoch": 1.67, "eval_logits/chosen": -2.536853313446045, "eval_logits/rejected": -2.4650635719299316, "eval_logps/chosen": -412.695556640625, "eval_logps/rejected": -411.40203857421875, "eval_loss": 0.5954813361167908, "eval_rewards/accuracies": 0.72817462682724, "eval_rewards/chosen": -0.6765100955963135, "eval_rewards/margins": 0.41046345233917236, "eval_rewards/rejected": -1.0869736671447754, "eval_runtime": 247.9314, "eval_samples_per_second": 8.067, "eval_steps_per_second": 0.254, "step": 1600 }, { "epoch": 1.68, "learning_rate": 2.134006775859537e-07, "logits/chosen": -2.4386448860168457, "logits/rejected": -2.4446792602539062, "logps/chosen": -400.6998596191406, "logps/rejected": -428.584716796875, "loss": 0.5628, "rewards/accuracies": 0.75, "rewards/chosen": -0.7244473695755005, "rewards/margins": 0.43817657232284546, "rewards/rejected": -1.1626240015029907, "step": 1610 }, { "epoch": 1.7, "learning_rate": 2.1059725661655948e-07, "logits/chosen": -2.543206214904785, "logits/rejected": -2.481884002685547, "logps/chosen": -437.24176025390625, "logps/rejected": -411.68115234375, "loss": 0.5581, "rewards/accuracies": 0.71875, "rewards/chosen": -0.815970778465271, "rewards/margins": 0.4566773474216461, "rewards/rejected": -1.2726482152938843, "step": 1620 }, { "epoch": 1.71, "learning_rate": 2.077989076050133e-07, "logits/chosen": -2.4628424644470215, "logits/rejected": -2.414001703262329, "logps/chosen": -437.2959899902344, "logps/rejected": -445.44427490234375, "loss": 0.5536, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.874140739440918, "rewards/margins": 0.4144424498081207, "rewards/rejected": -1.2885833978652954, "step": 1630 }, { "epoch": 1.72, "learning_rate": 2.050059907574076e-07, "logits/chosen": -2.507554054260254, "logits/rejected": -2.427834987640381, "logps/chosen": -408.7767028808594, "logps/rejected": -413.369873046875, "loss": 0.5731, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.7224391102790833, "rewards/margins": 0.4979207515716553, "rewards/rejected": -1.2203600406646729, "step": 1640 }, { "epoch": 1.73, "learning_rate": 2.022188655806016e-07, "logits/chosen": -2.523545503616333, "logits/rejected": -2.448479413986206, "logps/chosen": -451.228515625, "logps/rejected": -419.6627502441406, "loss": 0.5857, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.7539777755737305, "rewards/margins": 0.37716880440711975, "rewards/rejected": -1.1311466693878174, "step": 1650 }, { "epoch": 1.74, "learning_rate": 1.9943789083594564e-07, "logits/chosen": -2.4754841327667236, "logits/rejected": -2.428614377975464, "logps/chosen": -385.0950622558594, "logps/rejected": -403.7740783691406, "loss": 0.5752, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.7256184816360474, "rewards/margins": 0.4796815812587738, "rewards/rejected": -1.2053000926971436, "step": 1660 }, { "epoch": 1.75, "learning_rate": 1.9666342449310025e-07, "logits/chosen": -2.530557870864868, "logits/rejected": -2.4692397117614746, "logps/chosen": -409.13922119140625, "logps/rejected": -402.8389892578125, "loss": 0.5764, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.6921072602272034, "rewards/margins": 0.4285035729408264, "rewards/rejected": -1.1206107139587402, "step": 1670 }, { "epoch": 1.76, "learning_rate": 1.938958236839588e-07, "logits/chosen": -2.52577543258667, "logits/rejected": -2.4166135787963867, "logps/chosen": -440.7547302246094, "logps/rejected": -413.27911376953125, "loss": 0.5711, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.7067890167236328, "rewards/margins": 0.4192212224006653, "rewards/rejected": -1.1260101795196533, "step": 1680 }, { "epoch": 1.77, "learning_rate": 1.9113544465667637e-07, "logits/chosen": -2.46167254447937, "logits/rejected": -2.4476611614227295, "logps/chosen": -376.3783264160156, "logps/rejected": -403.9447326660156, "loss": 0.5749, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.6462227702140808, "rewards/margins": 0.4927960932254791, "rewards/rejected": -1.1390188932418823, "step": 1690 }, { "epoch": 1.78, "learning_rate": 1.88382642729814e-07, "logits/chosen": -2.5014305114746094, "logits/rejected": -2.4548792839050293, "logps/chosen": -411.25457763671875, "logps/rejected": -420.2723693847656, "loss": 0.5477, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.6595852971076965, "rewards/margins": 0.5647996068000793, "rewards/rejected": -1.2243849039077759, "step": 1700 }, { "epoch": 1.78, "eval_logits/chosen": -2.5078556537628174, "eval_logits/rejected": -2.4342150688171387, "eval_logps/chosen": -425.2423095703125, "eval_logps/rejected": -427.0003356933594, "eval_loss": 0.5904152393341064, "eval_rewards/accuracies": 0.7321428656578064, "eval_rewards/chosen": -0.8019776940345764, "eval_rewards/margins": 0.44097864627838135, "eval_rewards/rejected": -1.242956280708313, "eval_runtime": 248.3578, "eval_samples_per_second": 8.053, "eval_steps_per_second": 0.254, "step": 1700 }, { "epoch": 1.79, "learning_rate": 1.856377722466009e-07, "logits/chosen": -2.4711380004882812, "logits/rejected": -2.408111810684204, "logps/chosen": -441.938720703125, "logps/rejected": -463.12432861328125, "loss": 0.5691, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.7691561579704285, "rewards/margins": 0.46461838483810425, "rewards/rejected": -1.2337746620178223, "step": 1710 }, { "epoch": 1.8, "learning_rate": 1.8290118652932364e-07, "logits/chosen": -2.4911022186279297, "logits/rejected": -2.45578670501709, "logps/chosen": -388.4582214355469, "logps/rejected": -418.9519958496094, "loss": 0.5829, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.7712088823318481, "rewards/margins": 0.48189014196395874, "rewards/rejected": -1.2530990839004517, "step": 1720 }, { "epoch": 1.81, "learning_rate": 1.8017323783384601e-07, "logits/chosen": -2.524731397628784, "logits/rejected": -2.4751369953155518, "logps/chosen": -411.6083984375, "logps/rejected": -451.7425231933594, "loss": 0.5679, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.6576933860778809, "rewards/margins": 0.4532436430454254, "rewards/rejected": -1.1109369993209839, "step": 1730 }, { "epoch": 1.82, "learning_rate": 1.7745427730426635e-07, "logits/chosen": -2.5422208309173584, "logits/rejected": -2.4694812297821045, "logps/chosen": -394.8658447265625, "logps/rejected": -415.96124267578125, "loss": 0.5595, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.6860970258712769, "rewards/margins": 0.5364459753036499, "rewards/rejected": -1.2225428819656372, "step": 1740 }, { "epoch": 1.83, "learning_rate": 1.7474465492771772e-07, "logits/chosen": -2.4822356700897217, "logits/rejected": -2.392840623855591, "logps/chosen": -445.530029296875, "logps/rejected": -415.55078125, "loss": 0.5643, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.7760051488876343, "rewards/margins": 0.48258695006370544, "rewards/rejected": -1.258592128753662, "step": 1750 }, { "epoch": 1.84, "learning_rate": 1.7204471948931758e-07, "logits/chosen": -2.3855233192443848, "logits/rejected": -2.342365026473999, "logps/chosen": -368.435791015625, "logps/rejected": -391.09893798828125, "loss": 0.5579, "rewards/accuracies": 0.6875, "rewards/chosen": -0.7862086296081543, "rewards/margins": 0.42663902044296265, "rewards/rejected": -1.2128477096557617, "step": 1760 }, { "epoch": 1.85, "learning_rate": 1.6935481852727173e-07, "logits/chosen": -2.4512977600097656, "logits/rejected": -2.3995602130889893, "logps/chosen": -432.49578857421875, "logps/rejected": -437.99298095703125, "loss": 0.5795, "rewards/accuracies": 0.71875, "rewards/chosen": -0.7520878314971924, "rewards/margins": 0.4426315426826477, "rewards/rejected": -1.1947194337844849, "step": 1770 }, { "epoch": 1.86, "learning_rate": 1.6667529828813853e-07, "logits/chosen": -2.5046944618225098, "logits/rejected": -2.4515976905822754, "logps/chosen": -406.6270446777344, "logps/rejected": -427.93231201171875, "loss": 0.5629, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.6665788888931274, "rewards/margins": 0.5746535658836365, "rewards/rejected": -1.2412325143814087, "step": 1780 }, { "epoch": 1.87, "learning_rate": 1.640065036822605e-07, "logits/chosen": -2.5178215503692627, "logits/rejected": -2.4513187408447266, "logps/chosen": -407.05023193359375, "logps/rejected": -399.77423095703125, "loss": 0.5559, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.682750403881073, "rewards/margins": 0.4834769368171692, "rewards/rejected": -1.1662273406982422, "step": 1790 }, { "epoch": 1.88, "learning_rate": 1.6134877823936607e-07, "logits/chosen": -2.4752984046936035, "logits/rejected": -2.40830659866333, "logps/chosen": -424.7567443847656, "logps/rejected": -407.9845275878906, "loss": 0.5718, "rewards/accuracies": 0.78125, "rewards/chosen": -0.758663535118103, "rewards/margins": 0.4667127728462219, "rewards/rejected": -1.2253763675689697, "step": 1800 }, { "epoch": 1.88, "eval_logits/chosen": -2.4927799701690674, "eval_logits/rejected": -2.4185616970062256, "eval_logps/chosen": -424.36309814453125, "eval_logps/rejected": -427.0936584472656, "eval_loss": 0.5897929668426514, "eval_rewards/accuracies": 0.7321428656578064, "eval_rewards/chosen": -0.7931855320930481, "eval_rewards/margins": 0.4507039487361908, "eval_rewards/rejected": -1.2438894510269165, "eval_runtime": 244.3171, "eval_samples_per_second": 8.186, "eval_steps_per_second": 0.258, "step": 1800 }, { "epoch": 1.89, "learning_rate": 1.587024640643513e-07, "logits/chosen": -2.446739673614502, "logits/rejected": -2.4178757667541504, "logps/chosen": -401.63641357421875, "logps/rejected": -406.626708984375, "loss": 0.5706, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.7500771284103394, "rewards/margins": 0.4350431561470032, "rewards/rejected": -1.1851202249526978, "step": 1810 }, { "epoch": 1.9, "learning_rate": 1.5606790179324257e-07, "logits/chosen": -2.469186305999756, "logits/rejected": -2.3644776344299316, "logps/chosen": -437.4012756347656, "logps/rejected": -429.28369140625, "loss": 0.553, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.7953532338142395, "rewards/margins": 0.5376496315002441, "rewards/rejected": -1.3330028057098389, "step": 1820 }, { "epoch": 1.92, "learning_rate": 1.534454305493509e-07, "logits/chosen": -2.4878664016723633, "logits/rejected": -2.402182102203369, "logps/chosen": -430.43035888671875, "logps/rejected": -446.88055419921875, "loss": 0.5665, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.718338131904602, "rewards/margins": 0.5019052624702454, "rewards/rejected": -1.2202433347702026, "step": 1830 }, { "epoch": 1.93, "learning_rate": 1.5083538789961846e-07, "logits/chosen": -2.4689135551452637, "logits/rejected": -2.3651726245880127, "logps/chosen": -424.533935546875, "logps/rejected": -407.27264404296875, "loss": 0.5746, "rewards/accuracies": 0.71875, "rewards/chosen": -0.7878087162971497, "rewards/margins": 0.4385625720024109, "rewards/rejected": -1.2263712882995605, "step": 1840 }, { "epoch": 1.94, "learning_rate": 1.4823810981116767e-07, "logits/chosen": -2.420289993286133, "logits/rejected": -2.3955094814300537, "logps/chosen": -412.322509765625, "logps/rejected": -425.9512634277344, "loss": 0.5538, "rewards/accuracies": 0.75, "rewards/chosen": -0.6696097254753113, "rewards/margins": 0.4169555604457855, "rewards/rejected": -1.086565375328064, "step": 1850 }, { "epoch": 1.95, "learning_rate": 1.456539306080543e-07, "logits/chosen": -2.4510982036590576, "logits/rejected": -2.4162471294403076, "logps/chosen": -413.22509765625, "logps/rejected": -462.84381103515625, "loss": 0.5746, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.7580252289772034, "rewards/margins": 0.40020751953125, "rewards/rejected": -1.1582326889038086, "step": 1860 }, { "epoch": 1.96, "learning_rate": 1.4308318292823364e-07, "logits/chosen": -2.4641025066375732, "logits/rejected": -2.4155373573303223, "logps/chosen": -411.8502502441406, "logps/rejected": -425.70416259765625, "loss": 0.5509, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.7296292185783386, "rewards/margins": 0.44563156366348267, "rewards/rejected": -1.1752609014511108, "step": 1870 }, { "epoch": 1.97, "learning_rate": 1.4052619768074267e-07, "logits/chosen": -2.449817180633545, "logits/rejected": -2.390150547027588, "logps/chosen": -420.39849853515625, "logps/rejected": -426.16400146484375, "loss": 0.575, "rewards/accuracies": 0.75, "rewards/chosen": -0.7945643663406372, "rewards/margins": 0.48763760924339294, "rewards/rejected": -1.2822020053863525, "step": 1880 }, { "epoch": 1.98, "learning_rate": 1.3798330400310537e-07, "logits/chosen": -2.404205322265625, "logits/rejected": -2.3124032020568848, "logps/chosen": -383.17266845703125, "logps/rejected": -382.43841552734375, "loss": 0.5779, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.6244784593582153, "rewards/margins": 0.4246695935726166, "rewards/rejected": -1.0491479635238647, "step": 1890 }, { "epoch": 1.99, "learning_rate": 1.354548292189657e-07, "logits/chosen": -2.437732458114624, "logits/rejected": -2.3991034030914307, "logps/chosen": -385.60443115234375, "logps/rejected": -417.61468505859375, "loss": 0.563, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.7070263624191284, "rewards/margins": 0.44580182433128357, "rewards/rejected": -1.1528282165527344, "step": 1900 }, { "epoch": 1.99, "eval_logits/chosen": -2.496089220046997, "eval_logits/rejected": -2.422304391860962, "eval_logps/chosen": -413.7807312011719, "eval_logps/rejected": -415.832763671875, "eval_loss": 0.5903951525688171, "eval_rewards/accuracies": 0.7202380895614624, "eval_rewards/chosen": -0.687361478805542, "eval_rewards/margins": 0.44391965866088867, "eval_rewards/rejected": -1.1312810182571411, "eval_runtime": 243.6107, "eval_samples_per_second": 8.21, "eval_steps_per_second": 0.259, "step": 1900 }, { "epoch": 2.0, "learning_rate": 1.3294109879595412e-07, "logits/chosen": -2.518566131591797, "logits/rejected": -2.4946486949920654, "logps/chosen": -407.2130126953125, "logps/rejected": -429.19573974609375, "loss": 0.5917, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.6605619192123413, "rewards/margins": 0.3588492274284363, "rewards/rejected": -1.0194110870361328, "step": 1910 }, { "epoch": 2.01, "learning_rate": 1.304424363037932e-07, "logits/chosen": -2.449763059616089, "logits/rejected": -2.358640193939209, "logps/chosen": -434.584716796875, "logps/rejected": -435.99725341796875, "loss": 0.5423, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.7439482808113098, "rewards/margins": 0.45913758873939514, "rewards/rejected": -1.2030858993530273, "step": 1920 }, { "epoch": 2.02, "learning_rate": 1.2795916337264756e-07, "logits/chosen": -2.46913480758667, "logits/rejected": -2.377265214920044, "logps/chosen": -426.83660888671875, "logps/rejected": -421.1961364746094, "loss": 0.5715, "rewards/accuracies": 0.71875, "rewards/chosen": -0.7704228162765503, "rewards/margins": 0.47521066665649414, "rewards/rejected": -1.2456334829330444, "step": 1930 }, { "epoch": 2.03, "learning_rate": 1.2549159965172295e-07, "logits/chosen": -2.4469761848449707, "logits/rejected": -2.3427939414978027, "logps/chosen": -425.2955017089844, "logps/rejected": -424.61492919921875, "loss": 0.5667, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.7805430293083191, "rewards/margins": 0.49475497007369995, "rewards/rejected": -1.275297999382019, "step": 1940 }, { "epoch": 2.04, "learning_rate": 1.2304006276812122e-07, "logits/chosen": -2.3801145553588867, "logits/rejected": -2.350468397140503, "logps/chosen": -366.3126220703125, "logps/rejected": -400.9440002441406, "loss": 0.5529, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.7851904630661011, "rewards/margins": 0.4816361963748932, "rewards/rejected": -1.2668267488479614, "step": 1950 }, { "epoch": 2.05, "learning_rate": 1.2060486828595442e-07, "logits/chosen": -2.4748592376708984, "logits/rejected": -2.408360719680786, "logps/chosen": -419.6524353027344, "logps/rejected": -431.26055908203125, "loss": 0.5753, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.8105200529098511, "rewards/margins": 0.42913907766342163, "rewards/rejected": -1.239659309387207, "step": 1960 }, { "epoch": 2.06, "learning_rate": 1.1818632966572578e-07, "logits/chosen": -2.4946236610412598, "logits/rejected": -2.4172983169555664, "logps/chosen": -416.42645263671875, "logps/rejected": -442.043212890625, "loss": 0.5641, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.8131068348884583, "rewards/margins": 0.5074289441108704, "rewards/rejected": -1.3205358982086182, "step": 1970 }, { "epoch": 2.07, "learning_rate": 1.1578475822398032e-07, "logits/chosen": -2.455920696258545, "logits/rejected": -2.3736257553100586, "logps/chosen": -422.4615783691406, "logps/rejected": -441.34521484375, "loss": 0.566, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.817348301410675, "rewards/margins": 0.49631649255752563, "rewards/rejected": -1.3136647939682007, "step": 1980 }, { "epoch": 2.08, "learning_rate": 1.1340046309323206e-07, "logits/chosen": -2.474325656890869, "logits/rejected": -2.4197046756744385, "logps/chosen": -406.9759826660156, "logps/rejected": -424.93023681640625, "loss": 0.5626, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.8615679740905762, "rewards/margins": 0.4161076545715332, "rewards/rejected": -1.2776756286621094, "step": 1990 }, { "epoch": 2.09, "learning_rate": 1.1103375118217218e-07, "logits/chosen": -2.4074063301086426, "logits/rejected": -2.339216709136963, "logps/chosen": -384.3484802246094, "logps/rejected": -401.8287048339844, "loss": 0.5633, "rewards/accuracies": 0.75, "rewards/chosen": -0.7149208784103394, "rewards/margins": 0.4786139130592346, "rewards/rejected": -1.1935349702835083, "step": 2000 }, { "epoch": 2.09, "eval_logits/chosen": -2.4819118976593018, "eval_logits/rejected": -2.407344341278076, "eval_logps/chosen": -420.6850891113281, "eval_logps/rejected": -423.75042724609375, "eval_loss": 0.5884086489677429, "eval_rewards/accuracies": 0.726190447807312, "eval_rewards/chosen": -0.756405770778656, "eval_rewards/margins": 0.45405152440071106, "eval_rewards/rejected": -1.2104572057724, "eval_runtime": 244.0013, "eval_samples_per_second": 8.197, "eval_steps_per_second": 0.258, "step": 2000 }, { "epoch": 2.1, "learning_rate": 1.086849271361634e-07, "logits/chosen": -2.4910244941711426, "logits/rejected": -2.4178977012634277, "logps/chosen": -421.26654052734375, "logps/rejected": -442.17657470703125, "loss": 0.5609, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.7127526998519897, "rewards/margins": 0.517103374004364, "rewards/rejected": -1.2298561334609985, "step": 2010 }, { "epoch": 2.11, "learning_rate": 1.0635429329802578e-07, "logits/chosen": -2.428316831588745, "logits/rejected": -2.4025187492370605, "logps/chosen": -385.6629943847656, "logps/rejected": -423.72857666015625, "loss": 0.5433, "rewards/accuracies": 0.6875, "rewards/chosen": -0.7322330474853516, "rewards/margins": 0.46380695700645447, "rewards/rejected": -1.1960399150848389, "step": 2020 }, { "epoch": 2.12, "learning_rate": 1.0404214966911895e-07, "logits/chosen": -2.4701590538024902, "logits/rejected": -2.3929755687713623, "logps/chosen": -432.38629150390625, "logps/rejected": -409.7430114746094, "loss": 0.5412, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.7193957567214966, "rewards/margins": 0.4175872206687927, "rewards/rejected": -1.136983036994934, "step": 2030 }, { "epoch": 2.14, "learning_rate": 1.0174879387072549e-07, "logits/chosen": -2.4195656776428223, "logits/rejected": -2.3856568336486816, "logps/chosen": -378.31634521484375, "logps/rejected": -429.25396728515625, "loss": 0.5402, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.7908437848091125, "rewards/margins": 0.4326706528663635, "rewards/rejected": -1.2235145568847656, "step": 2040 }, { "epoch": 2.15, "learning_rate": 9.947452110574098e-08, "logits/chosen": -2.411748170852661, "logits/rejected": -2.3596882820129395, "logps/chosen": -426.4925842285156, "logps/rejected": -443.3196716308594, "loss": 0.5473, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.7338422536849976, "rewards/margins": 0.503951907157898, "rewards/rejected": -1.2377939224243164, "step": 2050 }, { "epoch": 2.16, "learning_rate": 9.721962412067519e-08, "logits/chosen": -2.401766061782837, "logits/rejected": -2.316685676574707, "logps/chosen": -408.12103271484375, "logps/rejected": -398.24017333984375, "loss": 0.5627, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.8616282343864441, "rewards/margins": 0.36459067463874817, "rewards/rejected": -1.2262189388275146, "step": 2060 }, { "epoch": 2.17, "learning_rate": 9.498439316796913e-08, "logits/chosen": -2.429500102996826, "logits/rejected": -2.341491937637329, "logps/chosen": -388.14208984375, "logps/rejected": -396.15423583984375, "loss": 0.551, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.7477534413337708, "rewards/margins": 0.5198956727981567, "rewards/rejected": -1.2676490545272827, "step": 2070 }, { "epoch": 2.18, "learning_rate": 9.276911596863441e-08, "logits/chosen": -2.430539608001709, "logits/rejected": -2.3732991218566895, "logps/chosen": -408.09771728515625, "logps/rejected": -429.29705810546875, "loss": 0.5612, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.6935451626777649, "rewards/margins": 0.5118761658668518, "rewards/rejected": -1.2054214477539062, "step": 2080 }, { "epoch": 2.19, "learning_rate": 9.05740776752163e-08, "logits/chosen": -2.534486770629883, "logits/rejected": -2.425327777862549, "logps/chosen": -466.2666931152344, "logps/rejected": -450.0174255371094, "loss": 0.5509, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.763168215751648, "rewards/margins": 0.5992245674133301, "rewards/rejected": -1.362392783164978, "step": 2090 }, { "epoch": 2.2, "learning_rate": 8.839956083508959e-08, "logits/chosen": -2.4332027435302734, "logits/rejected": -2.4018168449401855, "logps/chosen": -428.01910400390625, "logps/rejected": -455.42889404296875, "loss": 0.5564, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.7803698182106018, "rewards/margins": 0.4613746106624603, "rewards/rejected": -1.2417443990707397, "step": 2100 }, { "epoch": 2.2, "eval_logits/chosen": -2.4696059226989746, "eval_logits/rejected": -2.394796133041382, "eval_logps/chosen": -426.54876708984375, "eval_logps/rejected": -430.72430419921875, "eval_loss": 0.587758481502533, "eval_rewards/accuracies": 0.726190447807312, "eval_rewards/chosen": -0.815041720867157, "eval_rewards/margins": 0.4651543200016022, "eval_rewards/rejected": -1.280196189880371, "eval_runtime": 249.102, "eval_samples_per_second": 8.029, "eval_steps_per_second": 0.253, "step": 2100 }, { "epoch": 2.21, "learning_rate": 8.624584535408836e-08, "logits/chosen": -2.467682361602783, "logits/rejected": -2.3958325386047363, "logps/chosen": -442.32879638671875, "logps/rejected": -444.9439392089844, "loss": 0.5491, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.7700357437133789, "rewards/margins": 0.5091968178749084, "rewards/rejected": -1.2792325019836426, "step": 2110 }, { "epoch": 2.22, "learning_rate": 8.411320846047637e-08, "logits/chosen": -2.4758048057556152, "logits/rejected": -2.452558755874634, "logps/chosen": -398.3210754394531, "logps/rejected": -397.4677734375, "loss": 0.5505, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.7616570591926575, "rewards/margins": 0.417407363653183, "rewards/rejected": -1.179064393043518, "step": 2120 }, { "epoch": 2.23, "learning_rate": 8.200192466926201e-08, "logits/chosen": -2.4519848823547363, "logits/rejected": -2.3826258182525635, "logps/chosen": -436.55859375, "logps/rejected": -458.65594482421875, "loss": 0.5332, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.8493996858596802, "rewards/margins": 0.5574203729629517, "rewards/rejected": -1.4068200588226318, "step": 2130 }, { "epoch": 2.24, "learning_rate": 7.991226574686241e-08, "logits/chosen": -2.4183828830718994, "logits/rejected": -2.3716251850128174, "logps/chosen": -374.54803466796875, "logps/rejected": -389.0216064453125, "loss": 0.5528, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.7416827082633972, "rewards/margins": 0.5108539462089539, "rewards/rejected": -1.252536654472351, "step": 2140 }, { "epoch": 2.25, "learning_rate": 7.784450067612138e-08, "logits/chosen": -2.4434866905212402, "logits/rejected": -2.3613460063934326, "logps/chosen": -435.7193298339844, "logps/rejected": -428.2972717285156, "loss": 0.5349, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.7578371167182922, "rewards/margins": 0.5514657497406006, "rewards/rejected": -1.3093029260635376, "step": 2150 }, { "epoch": 2.26, "learning_rate": 7.579889562168585e-08, "logits/chosen": -2.434720993041992, "logits/rejected": -2.359743595123291, "logps/chosen": -392.49005126953125, "logps/rejected": -418.412353515625, "loss": 0.532, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.7233065962791443, "rewards/margins": 0.6114121675491333, "rewards/rejected": -1.3347185850143433, "step": 2160 }, { "epoch": 2.27, "learning_rate": 7.377571389574474e-08, "logits/chosen": -2.4690604209899902, "logits/rejected": -2.412727117538452, "logps/chosen": -407.22943115234375, "logps/rejected": -429.3172302246094, "loss": 0.5137, "rewards/accuracies": 0.78125, "rewards/chosen": -0.8272277116775513, "rewards/margins": 0.5033684968948364, "rewards/rejected": -1.3305962085723877, "step": 2170 }, { "epoch": 2.28, "learning_rate": 7.177521592413505e-08, "logits/chosen": -2.4891464710235596, "logits/rejected": -2.4008851051330566, "logps/chosen": -416.02374267578125, "logps/rejected": -434.93853759765625, "loss": 0.5715, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.84009850025177, "rewards/margins": 0.4889064431190491, "rewards/rejected": -1.3290048837661743, "step": 2180 }, { "epoch": 2.29, "learning_rate": 6.979765921282021e-08, "logits/chosen": -2.4268863201141357, "logits/rejected": -2.3376641273498535, "logps/chosen": -441.4808044433594, "logps/rejected": -436.463134765625, "loss": 0.5379, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.8702206611633301, "rewards/margins": 0.5598399639129639, "rewards/rejected": -1.430060625076294, "step": 2190 }, { "epoch": 2.3, "learning_rate": 6.784329831474276e-08, "logits/chosen": -2.4219398498535156, "logits/rejected": -2.3306546211242676, "logps/chosen": -419.054443359375, "logps/rejected": -465.0423889160156, "loss": 0.5373, "rewards/accuracies": 0.78125, "rewards/chosen": -0.7490620613098145, "rewards/margins": 0.6311138272285461, "rewards/rejected": -1.3801758289337158, "step": 2200 }, { "epoch": 2.3, "eval_logits/chosen": -2.4548380374908447, "eval_logits/rejected": -2.379462718963623, "eval_logps/chosen": -432.9532470703125, "eval_logps/rejected": -438.72894287109375, "eval_loss": 0.5864917039871216, "eval_rewards/accuracies": 0.7341269850730896, "eval_rewards/chosen": -0.879087507724762, "eval_rewards/margins": 0.481155127286911, "eval_rewards/rejected": -1.3602426052093506, "eval_runtime": 246.5339, "eval_samples_per_second": 8.112, "eval_steps_per_second": 0.256, "step": 2200 }, { "epoch": 2.31, "learning_rate": 6.591238479705901e-08, "logits/chosen": -2.487351179122925, "logits/rejected": -2.405980348587036, "logps/chosen": -408.71954345703125, "logps/rejected": -426.19744873046875, "loss": 0.5408, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.7479840517044067, "rewards/margins": 0.6013373136520386, "rewards/rejected": -1.3493213653564453, "step": 2210 }, { "epoch": 2.32, "learning_rate": 6.40051672087562e-08, "logits/chosen": -2.4331510066986084, "logits/rejected": -2.3278965950012207, "logps/chosen": -438.2916564941406, "logps/rejected": -428.78485107421875, "loss": 0.5201, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.8200156092643738, "rewards/margins": 0.6146233677864075, "rewards/rejected": -1.4346389770507812, "step": 2220 }, { "epoch": 2.33, "learning_rate": 6.212189104865972e-08, "logits/chosen": -2.448960781097412, "logits/rejected": -2.387979030609131, "logps/chosen": -428.59039306640625, "logps/rejected": -447.049560546875, "loss": 0.5591, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.7488642930984497, "rewards/margins": 0.5563133955001831, "rewards/rejected": -1.3051776885986328, "step": 2230 }, { "epoch": 2.34, "learning_rate": 6.026279873383191e-08, "logits/chosen": -2.3341879844665527, "logits/rejected": -2.2485053539276123, "logps/chosen": -434.8133850097656, "logps/rejected": -454.1504821777344, "loss": 0.5177, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.8202449083328247, "rewards/margins": 0.660535454750061, "rewards/rejected": -1.4807803630828857, "step": 2240 }, { "epoch": 2.35, "learning_rate": 5.842812956836804e-08, "logits/chosen": -2.5089287757873535, "logits/rejected": -2.418996572494507, "logps/chosen": -466.69097900390625, "logps/rejected": -467.65948486328125, "loss": 0.5671, "rewards/accuracies": 0.75, "rewards/chosen": -0.8531349897384644, "rewards/margins": 0.5565911531448364, "rewards/rejected": -1.4097262620925903, "step": 2250 }, { "epoch": 2.37, "learning_rate": 5.661811971259284e-08, "logits/chosen": -2.4990134239196777, "logits/rejected": -2.427431106567383, "logps/chosen": -414.7474060058594, "logps/rejected": -434.27142333984375, "loss": 0.5575, "rewards/accuracies": 0.71875, "rewards/chosen": -0.7728979587554932, "rewards/margins": 0.4413650631904602, "rewards/rejected": -1.2142630815505981, "step": 2260 }, { "epoch": 2.38, "learning_rate": 5.483300215266168e-08, "logits/chosen": -2.392768621444702, "logits/rejected": -2.3579678535461426, "logps/chosen": -379.36358642578125, "logps/rejected": -460.744873046875, "loss": 0.5327, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.820690929889679, "rewards/margins": 0.571506679058075, "rewards/rejected": -1.392197608947754, "step": 2270 }, { "epoch": 2.39, "learning_rate": 5.307300667057049e-08, "logits/chosen": -2.4417078495025635, "logits/rejected": -2.350722551345825, "logps/chosen": -456.86138916015625, "logps/rejected": -441.4781188964844, "loss": 0.5599, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.7839853763580322, "rewards/margins": 0.4950861930847168, "rewards/rejected": -1.279071569442749, "step": 2280 }, { "epoch": 2.4, "learning_rate": 5.133835981457771e-08, "logits/chosen": -2.4283745288848877, "logits/rejected": -2.406078815460205, "logps/chosen": -382.7402038574219, "logps/rejected": -412.78192138671875, "loss": 0.5472, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.8303159475326538, "rewards/margins": 0.4645245671272278, "rewards/rejected": -1.2948405742645264, "step": 2290 }, { "epoch": 2.41, "learning_rate": 4.962928487004339e-08, "logits/chosen": -2.4081058502197266, "logits/rejected": -2.3385443687438965, "logps/chosen": -403.322509765625, "logps/rejected": -437.6986389160156, "loss": 0.5559, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.7759246826171875, "rewards/margins": 0.6043592691421509, "rewards/rejected": -1.3802839517593384, "step": 2300 }, { "epoch": 2.41, "eval_logits/chosen": -2.4495673179626465, "eval_logits/rejected": -2.374314069747925, "eval_logps/chosen": -429.79962158203125, "eval_logps/rejected": -435.3000793457031, "eval_loss": 0.587175726890564, "eval_rewards/accuracies": 0.7242063283920288, "eval_rewards/chosen": -0.8475508689880371, "eval_rewards/margins": 0.478402704000473, "eval_rewards/rejected": -1.325953483581543, "eval_runtime": 245.2318, "eval_samples_per_second": 8.156, "eval_steps_per_second": 0.257, "step": 2300 }, { "epoch": 2.42, "learning_rate": 4.794600183068687e-08, "logits/chosen": -2.4469664096832275, "logits/rejected": -2.386204719543457, "logps/chosen": -409.12469482421875, "logps/rejected": -439.278564453125, "loss": 0.5463, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.8211742639541626, "rewards/margins": 0.6064980626106262, "rewards/rejected": -1.4276723861694336, "step": 2310 }, { "epoch": 2.43, "learning_rate": 4.628872737026984e-08, "logits/chosen": -2.4036548137664795, "logits/rejected": -2.3470802307128906, "logps/chosen": -398.6167907714844, "logps/rejected": -412.97027587890625, "loss": 0.554, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.8146356344223022, "rewards/margins": 0.4236365258693695, "rewards/rejected": -1.2382723093032837, "step": 2320 }, { "epoch": 2.44, "learning_rate": 4.4657674814705085e-08, "logits/chosen": -2.4584813117980957, "logits/rejected": -2.3649630546569824, "logps/chosen": -418.7437438964844, "logps/rejected": -406.3216857910156, "loss": 0.5604, "rewards/accuracies": 0.78125, "rewards/chosen": -0.7241760492324829, "rewards/margins": 0.540256917476654, "rewards/rejected": -1.2644331455230713, "step": 2330 }, { "epoch": 2.45, "learning_rate": 4.305305411459773e-08, "logits/chosen": -2.4563419818878174, "logits/rejected": -2.410745859146118, "logps/chosen": -436.936767578125, "logps/rejected": -442.15948486328125, "loss": 0.5533, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.7409111261367798, "rewards/margins": 0.5352380275726318, "rewards/rejected": -1.276149034500122, "step": 2340 }, { "epoch": 2.46, "learning_rate": 4.1475071818219466e-08, "logits/chosen": -2.3814117908477783, "logits/rejected": -2.3320162296295166, "logps/chosen": -450.9549865722656, "logps/rejected": -440.48974609375, "loss": 0.5545, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.8471084833145142, "rewards/margins": 0.5237026214599609, "rewards/rejected": -1.3708112239837646, "step": 2350 }, { "epoch": 2.47, "learning_rate": 3.992393104492209e-08, "logits/chosen": -2.4187827110290527, "logits/rejected": -2.3232665061950684, "logps/chosen": -408.9256286621094, "logps/rejected": -428.8360290527344, "loss": 0.5629, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.8053148984909058, "rewards/margins": 0.47992628812789917, "rewards/rejected": -1.2852413654327393, "step": 2360 }, { "epoch": 2.48, "learning_rate": 3.839983145899148e-08, "logits/chosen": -2.399820566177368, "logits/rejected": -2.2919247150421143, "logps/chosen": -429.00469970703125, "logps/rejected": -428.13018798828125, "loss": 0.5486, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.7831125259399414, "rewards/margins": 0.573869526386261, "rewards/rejected": -1.3569821119308472, "step": 2370 }, { "epoch": 2.49, "learning_rate": 3.690296924394659e-08, "logits/chosen": -2.3557441234588623, "logits/rejected": -2.333583354949951, "logps/chosen": -409.0820007324219, "logps/rejected": -417.942138671875, "loss": 0.5611, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.9311076402664185, "rewards/margins": 0.3103558421134949, "rewards/rejected": -1.2414636611938477, "step": 2380 }, { "epoch": 2.5, "learning_rate": 3.543353707728672e-08, "logits/chosen": -2.426609992980957, "logits/rejected": -2.3464412689208984, "logps/chosen": -406.8370666503906, "logps/rejected": -408.04913330078125, "loss": 0.5695, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.8274558186531067, "rewards/margins": 0.5477991700172424, "rewards/rejected": -1.3752549886703491, "step": 2390 }, { "epoch": 2.51, "learning_rate": 3.3991724105689736e-08, "logits/chosen": -2.3941025733947754, "logits/rejected": -2.2982020378112793, "logps/chosen": -423.27313232421875, "logps/rejected": -424.16973876953125, "loss": 0.5467, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.898272693157196, "rewards/margins": 0.496847003698349, "rewards/rejected": -1.3951194286346436, "step": 2400 }, { "epoch": 2.51, "eval_logits/chosen": -2.4452052116394043, "eval_logits/rejected": -2.3696937561035156, "eval_logps/chosen": -429.87860107421875, "eval_logps/rejected": -435.4400939941406, "eval_loss": 0.5867913961410522, "eval_rewards/accuracies": 0.7222222089767456, "eval_rewards/chosen": -0.8483405113220215, "eval_rewards/margins": 0.4790137708187103, "eval_rewards/rejected": -1.3273543119430542, "eval_runtime": 247.943, "eval_samples_per_second": 8.066, "eval_steps_per_second": 0.254, "step": 2400 }, { "epoch": 2.52, "learning_rate": 3.257771592066499e-08, "logits/chosen": -2.428584098815918, "logits/rejected": -2.3538498878479004, "logps/chosen": -428.6109924316406, "logps/rejected": -435.99542236328125, "loss": 0.5562, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.8293555378913879, "rewards/margins": 0.58681321144104, "rewards/rejected": -1.4161686897277832, "step": 2410 }, { "epoch": 2.53, "learning_rate": 3.119169453466367e-08, "logits/chosen": -2.5020382404327393, "logits/rejected": -2.42659330368042, "logps/chosen": -431.7511291503906, "logps/rejected": -445.3756408691406, "loss": 0.5214, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.7522695660591125, "rewards/margins": 0.48350271582603455, "rewards/rejected": -1.2357723712921143, "step": 2420 }, { "epoch": 2.54, "learning_rate": 2.983383835765038e-08, "logits/chosen": -2.4027044773101807, "logits/rejected": -2.3710248470306396, "logps/chosen": -436.78973388671875, "logps/rejected": -459.3680725097656, "loss": 0.5583, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.7804380655288696, "rewards/margins": 0.5590247511863708, "rewards/rejected": -1.3394627571105957, "step": 2430 }, { "epoch": 2.55, "learning_rate": 2.8504322174137452e-08, "logits/chosen": -2.419508695602417, "logits/rejected": -2.393357992172241, "logps/chosen": -372.4573059082031, "logps/rejected": -410.5888671875, "loss": 0.5364, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.818364143371582, "rewards/margins": 0.5231537818908691, "rewards/rejected": -1.341517686843872, "step": 2440 }, { "epoch": 2.56, "learning_rate": 2.7203317120687214e-08, "logits/chosen": -2.36185359954834, "logits/rejected": -2.3087821006774902, "logps/chosen": -438.8936462402344, "logps/rejected": -449.9029846191406, "loss": 0.5202, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.8277127146720886, "rewards/margins": 0.5720094442367554, "rewards/rejected": -1.3997222185134888, "step": 2450 }, { "epoch": 2.57, "learning_rate": 2.5930990663882298e-08, "logits/chosen": -2.5004594326019287, "logits/rejected": -2.4132115840911865, "logps/chosen": -463.64031982421875, "logps/rejected": -466.9752502441406, "loss": 0.53, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.8853015899658203, "rewards/margins": 0.4785071015357971, "rewards/rejected": -1.3638086318969727, "step": 2460 }, { "epoch": 2.59, "learning_rate": 2.4687506578770195e-08, "logits/chosen": -2.4215641021728516, "logits/rejected": -2.373194456100464, "logps/chosen": -429.7237854003906, "logps/rejected": -465.56622314453125, "loss": 0.5514, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.7883203029632568, "rewards/margins": 0.5758455991744995, "rewards/rejected": -1.3641657829284668, "step": 2470 }, { "epoch": 2.6, "learning_rate": 2.3473024927780888e-08, "logits/chosen": -2.3653247356414795, "logits/rejected": -2.373579502105713, "logps/chosen": -422.0316467285156, "logps/rejected": -431.6985778808594, "loss": 0.5492, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.880569577217102, "rewards/margins": 0.3722376823425293, "rewards/rejected": -1.252807378768921, "step": 2480 }, { "epoch": 2.61, "learning_rate": 2.228770204012448e-08, "logits/chosen": -2.4217820167541504, "logits/rejected": -2.3459651470184326, "logps/chosen": -400.4313049316406, "logps/rejected": -409.916259765625, "loss": 0.5498, "rewards/accuracies": 0.71875, "rewards/chosen": -0.8538491129875183, "rewards/margins": 0.4800568222999573, "rewards/rejected": -1.3339059352874756, "step": 2490 }, { "epoch": 2.62, "learning_rate": 2.1131690491667547e-08, "logits/chosen": -2.431408405303955, "logits/rejected": -2.384169578552246, "logps/chosen": -432.4293518066406, "logps/rejected": -427.60626220703125, "loss": 0.5666, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.9473799467086792, "rewards/margins": 0.34295937418937683, "rewards/rejected": -1.2903392314910889, "step": 2500 }, { "epoch": 2.62, "eval_logits/chosen": -2.439899444580078, "eval_logits/rejected": -2.364093780517578, "eval_logps/chosen": -432.5810852050781, "eval_logps/rejected": -438.9631042480469, "eval_loss": 0.585797131061554, "eval_rewards/accuracies": 0.7242063283920288, "eval_rewards/chosen": -0.8753649592399597, "eval_rewards/margins": 0.4872189164161682, "eval_rewards/rejected": -1.3625837564468384, "eval_runtime": 250.9047, "eval_samples_per_second": 7.971, "eval_steps_per_second": 0.251, "step": 2500 }, { "epoch": 2.63, "learning_rate": 2.0005139085293942e-08, "logits/chosen": -2.3748772144317627, "logits/rejected": -2.366490364074707, "logps/chosen": -394.9242248535156, "logps/rejected": -445.06317138671875, "loss": 0.5697, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.9380524754524231, "rewards/margins": 0.4403650164604187, "rewards/rejected": -1.3784174919128418, "step": 2510 }, { "epoch": 2.64, "learning_rate": 1.8908192831750545e-08, "logits/chosen": -2.3916258811950684, "logits/rejected": -2.3020219802856445, "logps/chosen": -426.46990966796875, "logps/rejected": -416.8695373535156, "loss": 0.5335, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.8564615249633789, "rewards/margins": 0.5280572175979614, "rewards/rejected": -1.3845187425613403, "step": 2520 }, { "epoch": 2.65, "learning_rate": 1.7840992930981345e-08, "logits/chosen": -2.4265730381011963, "logits/rejected": -2.38875150680542, "logps/chosen": -462.3692932128906, "logps/rejected": -460.39337158203125, "loss": 0.5537, "rewards/accuracies": 0.71875, "rewards/chosen": -0.8171685338020325, "rewards/margins": 0.4775725305080414, "rewards/rejected": -1.294741153717041, "step": 2530 }, { "epoch": 2.66, "learning_rate": 1.6803676753952138e-08, "logits/chosen": -2.3773293495178223, "logits/rejected": -2.3178646564483643, "logps/chosen": -409.1978759765625, "logps/rejected": -452.56549072265625, "loss": 0.529, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.7988417744636536, "rewards/margins": 0.6674584150314331, "rewards/rejected": -1.466300129890442, "step": 2540 }, { "epoch": 2.67, "learning_rate": 1.5796377824967788e-08, "logits/chosen": -2.4757285118103027, "logits/rejected": -2.3862550258636475, "logps/chosen": -461.76483154296875, "logps/rejected": -448.6534118652344, "loss": 0.5449, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.7839967012405396, "rewards/margins": 0.5081378817558289, "rewards/rejected": -1.2921345233917236, "step": 2550 }, { "epoch": 2.68, "learning_rate": 1.481922580448533e-08, "logits/chosen": -2.402846574783325, "logits/rejected": -2.3772921562194824, "logps/chosen": -418.25079345703125, "logps/rejected": -474.2286682128906, "loss": 0.5587, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.7962145805358887, "rewards/margins": 0.4984118938446045, "rewards/rejected": -1.2946264743804932, "step": 2560 }, { "epoch": 2.69, "learning_rate": 1.3872346472423246e-08, "logits/chosen": -2.4501101970672607, "logits/rejected": -2.356166362762451, "logps/chosen": -448.30303955078125, "logps/rejected": -447.73712158203125, "loss": 0.5517, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.8609638214111328, "rewards/margins": 0.46454888582229614, "rewards/rejected": -1.3255127668380737, "step": 2570 }, { "epoch": 2.7, "learning_rate": 1.2955861711971745e-08, "logits/chosen": -2.4125468730926514, "logits/rejected": -2.312474012374878, "logps/chosen": -454.7066955566406, "logps/rejected": -423.25390625, "loss": 0.5568, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.8763197064399719, "rewards/margins": 0.5553014874458313, "rewards/rejected": -1.4316211938858032, "step": 2580 }, { "epoch": 2.71, "learning_rate": 1.2069889493903112e-08, "logits/chosen": -2.40596342086792, "logits/rejected": -2.3436505794525146, "logps/chosen": -426.20660400390625, "logps/rejected": -443.60552978515625, "loss": 0.5476, "rewards/accuracies": 0.78125, "rewards/chosen": -0.8504959344863892, "rewards/margins": 0.5777041912078857, "rewards/rejected": -1.4282000064849854, "step": 2590 }, { "epoch": 2.72, "learning_rate": 1.1214543861387039e-08, "logits/chosen": -2.3759148120880127, "logits/rejected": -2.3197531700134277, "logps/chosen": -403.3725280761719, "logps/rejected": -445.80767822265625, "loss": 0.5113, "rewards/accuracies": 0.78125, "rewards/chosen": -0.731840193271637, "rewards/margins": 0.7046986818313599, "rewards/rejected": -1.4365389347076416, "step": 2600 }, { "epoch": 2.72, "eval_logits/chosen": -2.4361400604248047, "eval_logits/rejected": -2.3603618144989014, "eval_logps/chosen": -434.4620361328125, "eval_logps/rejected": -441.12109375, "eval_loss": 0.5855809450149536, "eval_rewards/accuracies": 0.7242063283920288, "eval_rewards/chosen": -0.894174337387085, "eval_rewards/margins": 0.4899892508983612, "eval_rewards/rejected": -1.384163737297058, "eval_runtime": 244.6014, "eval_samples_per_second": 8.177, "eval_steps_per_second": 0.258, "step": 2600 }, { "epoch": 2.73, "learning_rate": 1.0389934915310344e-08, "logits/chosen": -2.3394923210144043, "logits/rejected": -2.297569990158081, "logps/chosen": -414.2259826660156, "logps/rejected": -447.39239501953125, "loss": 0.5295, "rewards/accuracies": 0.71875, "rewards/chosen": -0.9291477203369141, "rewards/margins": 0.49646610021591187, "rewards/rejected": -1.4256137609481812, "step": 2610 }, { "epoch": 2.74, "learning_rate": 9.596168800105081e-09, "logits/chosen": -2.4024291038513184, "logits/rejected": -2.347120523452759, "logps/chosen": -436.79364013671875, "logps/rejected": -450.7452697753906, "loss": 0.5388, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.8427282571792603, "rewards/margins": 0.5560713410377502, "rewards/rejected": -1.3987995386123657, "step": 2620 }, { "epoch": 2.75, "learning_rate": 8.833347690085258e-09, "logits/chosen": -2.4367868900299072, "logits/rejected": -2.39370059967041, "logps/chosen": -430.2493591308594, "logps/rejected": -459.7605895996094, "loss": 0.5325, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.816397488117218, "rewards/margins": 0.48605260252952576, "rewards/rejected": -1.3024499416351318, "step": 2630 }, { "epoch": 2.76, "learning_rate": 8.101569776295087e-09, "logits/chosen": -2.44547700881958, "logits/rejected": -2.3715062141418457, "logps/chosen": -451.3525390625, "logps/rejected": -472.49871826171875, "loss": 0.5547, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.8327264785766602, "rewards/margins": 0.5013109445571899, "rewards/rejected": -1.3340375423431396, "step": 2640 }, { "epoch": 2.77, "learning_rate": 7.400929253869537e-09, "logits/chosen": -2.386373519897461, "logits/rejected": -2.3548552989959717, "logps/chosen": -411.06036376953125, "logps/rejected": -401.76202392578125, "loss": 0.5401, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.7602349519729614, "rewards/margins": 0.544723391532898, "rewards/rejected": -1.3049582242965698, "step": 2650 }, { "epoch": 2.78, "learning_rate": 6.731516309909619e-09, "logits/chosen": -2.4245498180389404, "logits/rejected": -2.351013660430908, "logps/chosen": -413.696533203125, "logps/rejected": -431.78143310546875, "loss": 0.5556, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.8400930166244507, "rewards/margins": 0.5150105357170105, "rewards/rejected": -1.3551037311553955, "step": 2660 }, { "epoch": 2.79, "learning_rate": 6.093417111873306e-09, "logits/chosen": -2.405090808868408, "logits/rejected": -2.363133668899536, "logps/chosen": -446.4725036621094, "logps/rejected": -457.05291748046875, "loss": 0.5637, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.940416157245636, "rewards/margins": 0.46864986419677734, "rewards/rejected": -1.4090659618377686, "step": 2670 }, { "epoch": 2.8, "learning_rate": 5.486713796483966e-09, "logits/chosen": -2.4188191890716553, "logits/rejected": -2.4030518531799316, "logps/chosen": -426.7608947753906, "logps/rejected": -478.41424560546875, "loss": 0.5375, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.9050353765487671, "rewards/margins": 0.4908295273780823, "rewards/rejected": -1.3958650827407837, "step": 2680 }, { "epoch": 2.82, "learning_rate": 4.911484459157844e-09, "logits/chosen": -2.355522632598877, "logits/rejected": -2.2627921104431152, "logps/chosen": -414.41058349609375, "logps/rejected": -408.3012390136719, "loss": 0.5264, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.8410286903381348, "rewards/margins": 0.5534576177597046, "rewards/rejected": -1.394486427307129, "step": 2690 }, { "epoch": 2.83, "learning_rate": 4.36780314395116e-09, "logits/chosen": -2.391401767730713, "logits/rejected": -2.301657199859619, "logps/chosen": -408.6781005859375, "logps/rejected": -399.43438720703125, "loss": 0.5601, "rewards/accuracies": 0.65625, "rewards/chosen": -0.82252436876297, "rewards/margins": 0.43992215394973755, "rewards/rejected": -1.262446403503418, "step": 2700 }, { "epoch": 2.83, "eval_logits/chosen": -2.434546947479248, "eval_logits/rejected": -2.35845685005188, "eval_logps/chosen": -435.4488525390625, "eval_logps/rejected": -442.29296875, "eval_loss": 0.5855222344398499, "eval_rewards/accuracies": 0.726190447807312, "eval_rewards/chosen": -0.904042661190033, "eval_rewards/margins": 0.49184030294418335, "eval_rewards/rejected": -1.3958829641342163, "eval_runtime": 247.9608, "eval_samples_per_second": 8.066, "eval_steps_per_second": 0.254, "step": 2700 }, { "epoch": 2.84, "learning_rate": 3.8557398340296195e-09, "logits/chosen": -2.4358608722686768, "logits/rejected": -2.317959785461426, "logps/chosen": -421.12969970703125, "logps/rejected": -426.5562438964844, "loss": 0.5643, "rewards/accuracies": 0.71875, "rewards/chosen": -0.9019654393196106, "rewards/margins": 0.5431965589523315, "rewards/rejected": -1.445162057876587, "step": 2710 }, { "epoch": 2.85, "learning_rate": 3.3753604426595417e-09, "logits/chosen": -2.3855860233306885, "logits/rejected": -2.311525344848633, "logps/chosen": -406.04010009765625, "logps/rejected": -413.10186767578125, "loss": 0.5756, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.8721002340316772, "rewards/margins": 0.516608715057373, "rewards/rejected": -1.3887090682983398, "step": 2720 }, { "epoch": 2.86, "learning_rate": 2.926726804723917e-09, "logits/chosen": -2.3775479793548584, "logits/rejected": -2.3673160076141357, "logps/chosen": -443.3018493652344, "logps/rejected": -457.483154296875, "loss": 0.5549, "rewards/accuracies": 0.75, "rewards/chosen": -0.8438417315483093, "rewards/margins": 0.5027114748954773, "rewards/rejected": -1.3465534448623657, "step": 2730 }, { "epoch": 2.87, "learning_rate": 2.5098966687626954e-09, "logits/chosen": -2.4318108558654785, "logits/rejected": -2.3373677730560303, "logps/chosen": -429.6363830566406, "logps/rejected": -441.6180725097656, "loss": 0.5237, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.8596477508544922, "rewards/margins": 0.6358083486557007, "rewards/rejected": -1.4954560995101929, "step": 2740 }, { "epoch": 2.88, "learning_rate": 2.124923689539426e-09, "logits/chosen": -2.4223837852478027, "logits/rejected": -2.356449604034424, "logps/chosen": -418.0809631347656, "logps/rejected": -436.1025390625, "loss": 0.5379, "rewards/accuracies": 0.75, "rewards/chosen": -0.8189032673835754, "rewards/margins": 0.5849324464797974, "rewards/rejected": -1.4038358926773071, "step": 2750 }, { "epoch": 2.89, "learning_rate": 1.7718574211347537e-09, "logits/chosen": -2.3926260471343994, "logits/rejected": -2.3217663764953613, "logps/chosen": -391.73358154296875, "logps/rejected": -397.6258239746094, "loss": 0.5441, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.8713921308517456, "rewards/margins": 0.4508208632469177, "rewards/rejected": -1.3222129344940186, "step": 2760 }, { "epoch": 2.9, "learning_rate": 1.4507433105677703e-09, "logits/chosen": -2.361290454864502, "logits/rejected": -2.2786500453948975, "logps/chosen": -435.0477600097656, "logps/rejected": -461.3599548339844, "loss": 0.5338, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.8669681549072266, "rewards/margins": 0.5835943222045898, "rewards/rejected": -1.4505623579025269, "step": 2770 }, { "epoch": 2.91, "learning_rate": 1.1616226919460015e-09, "logits/chosen": -2.339124917984009, "logits/rejected": -2.259683132171631, "logps/chosen": -381.10076904296875, "logps/rejected": -404.74615478515625, "loss": 0.5604, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.9128230214118958, "rewards/margins": 0.4604857563972473, "rewards/rejected": -1.3733086585998535, "step": 2780 }, { "epoch": 2.92, "learning_rate": 9.045327811449676e-10, "logits/chosen": -2.357779026031494, "logits/rejected": -2.281944990158081, "logps/chosen": -403.49346923828125, "logps/rejected": -422.0860290527344, "loss": 0.5393, "rewards/accuracies": 0.75, "rewards/chosen": -0.8404678106307983, "rewards/margins": 0.6726306676864624, "rewards/rejected": -1.5130985975265503, "step": 2790 }, { "epoch": 2.93, "learning_rate": 6.795066710175157e-10, "logits/chosen": -2.414121150970459, "logits/rejected": -2.334519863128662, "logps/chosen": -422.6380920410156, "logps/rejected": -431.3646545410156, "loss": 0.5303, "rewards/accuracies": 0.78125, "rewards/chosen": -0.8720327615737915, "rewards/margins": 0.5439731478691101, "rewards/rejected": -1.4160058498382568, "step": 2800 }, { "epoch": 2.93, "eval_logits/chosen": -2.434152364730835, "eval_logits/rejected": -2.358067512512207, "eval_logps/chosen": -435.0786437988281, "eval_logps/rejected": -441.68048095703125, "eval_loss": 0.5856688618659973, "eval_rewards/accuracies": 0.7242063283920288, "eval_rewards/chosen": -0.9003406763076782, "eval_rewards/margins": 0.4894171357154846, "eval_rewards/rejected": -1.389757752418518, "eval_runtime": 249.9152, "eval_samples_per_second": 8.003, "eval_steps_per_second": 0.252, "step": 2800 }, { "epoch": 2.94, "learning_rate": 4.86573327134282e-10, "logits/chosen": -2.392698287963867, "logits/rejected": -2.3596174716949463, "logps/chosen": -470.7052307128906, "logps/rejected": -468.05133056640625, "loss": 0.5605, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.9614042043685913, "rewards/margins": 0.41344791650772095, "rewards/rejected": -1.374852180480957, "step": 2810 }, { "epoch": 2.95, "learning_rate": 3.2575758405506414e-10, "logits/chosen": -2.449122428894043, "logits/rejected": -2.3575618267059326, "logps/chosen": -462.04248046875, "logps/rejected": -493.18243408203125, "loss": 0.5312, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.8269790410995483, "rewards/margins": 0.6107988357543945, "rewards/rejected": -1.4377778768539429, "step": 2820 }, { "epoch": 2.96, "learning_rate": 1.9708014213221101e-10, "logits/chosen": -2.3595032691955566, "logits/rejected": -2.317110538482666, "logps/chosen": -446.1568298339844, "logps/rejected": -459.81341552734375, "loss": 0.5503, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.9512478709220886, "rewards/margins": 0.4871467649936676, "rewards/rejected": -1.4383947849273682, "step": 2830 }, { "epoch": 2.97, "learning_rate": 1.0055756484589339e-10, "logits/chosen": -2.350008249282837, "logits/rejected": -2.2946763038635254, "logps/chosen": -425.847412109375, "logps/rejected": -416.67535400390625, "loss": 0.5338, "rewards/accuracies": 0.6875, "rewards/chosen": -0.7966060042381287, "rewards/margins": 0.5134121179580688, "rewards/rejected": -1.3100181818008423, "step": 2840 }, { "epoch": 2.98, "learning_rate": 3.620227667228137e-11, "logits/chosen": -2.44765043258667, "logits/rejected": -2.388314723968506, "logps/chosen": -457.08624267578125, "logps/rejected": -460.8662109375, "loss": 0.5446, "rewards/accuracies": 0.75, "rewards/chosen": -0.860503077507019, "rewards/margins": 0.4813441336154938, "rewards/rejected": -1.3418471813201904, "step": 2850 }, { "epoch": 2.99, "learning_rate": 4.022561484018361e-12, "logits/chosen": -2.4590046405792236, "logits/rejected": -2.3513126373291016, "logps/chosen": -433.288330078125, "logps/rejected": -440.690673828125, "loss": 0.5286, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.8339791297912598, "rewards/margins": 0.594098687171936, "rewards/rejected": -1.4280778169631958, "step": 2860 }, { "epoch": 3.0, "step": 2865, "total_flos": 0.0, "train_loss": 0.5928295368507478, "train_runtime": 47453.5759, "train_samples_per_second": 3.865, "train_steps_per_second": 0.06 } ], "logging_steps": 10, "max_steps": 2865, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }