diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,8066 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 100, + "global_step": 5733, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 8.710801393728223e-10, + "logits/chosen": 0.3170108497142792, + "logits/rejected": 0.35767874121665955, + "logps/chosen": -271.9781494140625, + "logps/rejected": -165.8260955810547, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 1 + }, + { + "epoch": 0.01, + "learning_rate": 8.710801393728223e-09, + "logits/chosen": 0.34384429454803467, + "logits/rejected": 0.2845779359340668, + "logps/chosen": -362.0292053222656, + "logps/rejected": -280.8023376464844, + "loss": 0.693, + "rewards/accuracies": 0.5277777910232544, + "rewards/chosen": 0.0007958578644320369, + "rewards/margins": 0.0013568435097113252, + "rewards/rejected": -0.0005609856452792883, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 1.7421602787456446e-08, + "logits/chosen": 0.41451185941696167, + "logits/rejected": 0.33514469861984253, + "logps/chosen": -354.496337890625, + "logps/rejected": -268.55401611328125, + "loss": 0.6932, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.0009157696040347219, + "rewards/margins": 0.0009497471037320793, + "rewards/rejected": -3.397750697331503e-05, + "step": 20 + }, + { + "epoch": 0.02, + "learning_rate": 2.6132404181184667e-08, + "logits/chosen": 0.3903641700744629, + "logits/rejected": 0.32494959235191345, + "logps/chosen": -281.6260986328125, + "logps/rejected": -235.763427734375, + "loss": 0.6931, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.0007924304227344692, + "rewards/margins": 9.540463361190632e-05, + "rewards/rejected": 0.0006970257963985205, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 3.484320557491289e-08, + "logits/chosen": 0.36575648188591003, + "logits/rejected": 0.4160943925380707, + "logps/chosen": -225.0307159423828, + "logps/rejected": -243.137451171875, + "loss": 0.6933, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.0002304415247635916, + "rewards/margins": -0.000740631076041609, + "rewards/rejected": 0.0005101895658299327, + "step": 40 + }, + { + "epoch": 0.03, + "learning_rate": 4.355400696864111e-08, + "logits/chosen": 0.37944719195365906, + "logits/rejected": 0.37457937002182007, + "logps/chosen": -354.24530029296875, + "logps/rejected": -278.2007141113281, + "loss": 0.693, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.0003198208869434893, + "rewards/margins": -0.0002704802027437836, + "rewards/rejected": -4.93407242174726e-05, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 5.2264808362369334e-08, + "logits/chosen": 0.2979043424129486, + "logits/rejected": 0.2968657910823822, + "logps/chosen": -308.49493408203125, + "logps/rejected": -271.253662109375, + "loss": 0.6933, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": -0.0003536358126439154, + "rewards/margins": -0.0003051784005947411, + "rewards/rejected": -4.8457364755449817e-05, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 6.097560975609756e-08, + "logits/chosen": 0.3355613946914673, + "logits/rejected": 0.3047105073928833, + "logps/chosen": -308.9933166503906, + "logps/rejected": -264.9416198730469, + "loss": 0.6929, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.0006061201565898955, + "rewards/margins": -0.0004816774744540453, + "rewards/rejected": -0.00012444249296095222, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 6.968641114982578e-08, + "logits/chosen": 0.4044817090034485, + "logits/rejected": 0.3871970772743225, + "logps/chosen": -311.4313049316406, + "logps/rejected": -269.52813720703125, + "loss": 0.6929, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.000310670118778944, + "rewards/margins": 0.00034131400752812624, + "rewards/rejected": -3.064396514673717e-05, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 7.8397212543554e-08, + "logits/chosen": 0.33987581729888916, + "logits/rejected": 0.37897253036499023, + "logps/chosen": -255.73159790039062, + "logps/rejected": -286.35845947265625, + "loss": 0.6929, + "rewards/accuracies": 0.42500001192092896, + "rewards/chosen": -9.91291381069459e-05, + "rewards/margins": 3.401337380637415e-05, + "rewards/rejected": -0.00013314261741470546, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 8.710801393728223e-08, + "logits/chosen": 0.32025259733200073, + "logits/rejected": 0.31265729665756226, + "logps/chosen": -308.8668518066406, + "logps/rejected": -258.37359619140625, + "loss": 0.6932, + "rewards/accuracies": 0.42500001192092896, + "rewards/chosen": 0.0013080101925879717, + "rewards/margins": 0.0005450951284728944, + "rewards/rejected": 0.0007629150059074163, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 9.581881533101045e-08, + "logits/chosen": 0.3391476273536682, + "logits/rejected": 0.3699817955493927, + "logps/chosen": -294.4396057128906, + "logps/rejected": -265.50604248046875, + "loss": 0.6932, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.0004913022858090699, + "rewards/margins": -0.0006068542716093361, + "rewards/rejected": 0.00011555196397239342, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 1.0452961672473867e-07, + "logits/chosen": 0.2914244830608368, + "logits/rejected": 0.2608393430709839, + "logps/chosen": -322.7389831542969, + "logps/rejected": -296.3192138671875, + "loss": 0.6931, + "rewards/accuracies": 0.3499999940395355, + "rewards/chosen": -0.0007188282324932516, + "rewards/margins": -0.000560021901037544, + "rewards/rejected": -0.00015880633145570755, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 1.132404181184669e-07, + "logits/chosen": 0.2627529799938202, + "logits/rejected": 0.21750828623771667, + "logps/chosen": -391.84979248046875, + "logps/rejected": -304.40533447265625, + "loss": 0.6929, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": 5.764817979070358e-05, + "rewards/margins": 0.0006713207112625241, + "rewards/rejected": -0.0006136724841780961, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 1.219512195121951e-07, + "logits/chosen": 0.3835189938545227, + "logits/rejected": 0.2935212552547455, + "logps/chosen": -296.1155700683594, + "logps/rejected": -293.04443359375, + "loss": 0.693, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": 0.0009465150651521981, + "rewards/margins": 0.0007651118794456124, + "rewards/rejected": 0.0001814032148104161, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 1.3066202090592334e-07, + "logits/chosen": 0.3177749514579773, + "logits/rejected": 0.2864537835121155, + "logps/chosen": -346.4631652832031, + "logps/rejected": -260.0641784667969, + "loss": 0.6928, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.0005447001312859356, + "rewards/margins": -0.00026119090034626424, + "rewards/rejected": -0.00028350926004350185, + "step": 150 + }, + { + "epoch": 0.08, + "learning_rate": 1.3937282229965157e-07, + "logits/chosen": 0.32583457231521606, + "logits/rejected": 0.27274638414382935, + "logps/chosen": -319.1077575683594, + "logps/rejected": -266.54486083984375, + "loss": 0.693, + "rewards/accuracies": 0.42500001192092896, + "rewards/chosen": -0.0006812402862124145, + "rewards/margins": -0.0006552303093485534, + "rewards/rejected": -2.6010035071521997e-05, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 1.480836236933798e-07, + "logits/chosen": 0.2908182144165039, + "logits/rejected": 0.27819719910621643, + "logps/chosen": -338.5760498046875, + "logps/rejected": -307.96539306640625, + "loss": 0.6927, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -1.5355797586380504e-05, + "rewards/margins": -0.0009833310032263398, + "rewards/rejected": 0.0009679750655777752, + "step": 170 + }, + { + "epoch": 0.09, + "learning_rate": 1.56794425087108e-07, + "logits/chosen": 0.44243597984313965, + "logits/rejected": 0.3705524206161499, + "logps/chosen": -322.1753234863281, + "logps/rejected": -295.217529296875, + "loss": 0.6928, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.0010572379687801003, + "rewards/margins": 0.0020315528381615877, + "rewards/rejected": -0.000974315102212131, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 1.6550522648083622e-07, + "logits/chosen": 0.47350215911865234, + "logits/rejected": 0.40756964683532715, + "logps/chosen": -276.59906005859375, + "logps/rejected": -268.8476867675781, + "loss": 0.6927, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": 0.0002765903191175312, + "rewards/margins": 0.00016240756667684764, + "rewards/rejected": 0.00011418270878493786, + "step": 190 + }, + { + "epoch": 0.1, + "learning_rate": 1.7421602787456445e-07, + "logits/chosen": 0.3433852195739746, + "logits/rejected": 0.2721015214920044, + "logps/chosen": -349.0690002441406, + "logps/rejected": -322.2076416015625, + "loss": 0.6927, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 0.002144585596397519, + "rewards/margins": 0.0025809439830482006, + "rewards/rejected": -0.0004363584448583424, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 1.8292682926829268e-07, + "logits/chosen": 0.3642790913581848, + "logits/rejected": 0.4311489164829254, + "logps/chosen": -271.95513916015625, + "logps/rejected": -248.6610870361328, + "loss": 0.6929, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.00040192221058532596, + "rewards/margins": -0.000289176736259833, + "rewards/rejected": -0.00011274554708506912, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 1.916376306620209e-07, + "logits/chosen": 0.3174039125442505, + "logits/rejected": 0.36144882440567017, + "logps/chosen": -295.30340576171875, + "logps/rejected": -248.4581756591797, + "loss": 0.6928, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.00016610624152235687, + "rewards/margins": 0.0011444597039371729, + "rewards/rejected": -0.0009783534333109856, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 2.003484320557491e-07, + "logits/chosen": 0.33461135625839233, + "logits/rejected": 0.2857970595359802, + "logps/chosen": -317.2145690917969, + "logps/rejected": -250.4669952392578, + "loss": 0.6928, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.0003413598460610956, + "rewards/margins": 0.0002269690448883921, + "rewards/rejected": 0.00011439078662078828, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 2.0905923344947734e-07, + "logits/chosen": 0.3559264540672302, + "logits/rejected": 0.3971884846687317, + "logps/chosen": -249.724365234375, + "logps/rejected": -211.0904541015625, + "loss": 0.6926, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.0007045454694889486, + "rewards/margins": 0.0008303613285534084, + "rewards/rejected": -0.00012581582996062934, + "step": 240 + }, + { + "epoch": 0.13, + "learning_rate": 2.1777003484320556e-07, + "logits/chosen": 0.2798416018486023, + "logits/rejected": 0.30008456110954285, + "logps/chosen": -309.72613525390625, + "logps/rejected": -267.5157470703125, + "loss": 0.693, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.0006688380381092429, + "rewards/margins": 0.0011650488013401628, + "rewards/rejected": -0.0004962105304002762, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 2.264808362369338e-07, + "logits/chosen": 0.32978278398513794, + "logits/rejected": 0.3426227867603302, + "logps/chosen": -377.54766845703125, + "logps/rejected": -277.21502685546875, + "loss": 0.6925, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.001659739762544632, + "rewards/margins": 0.0014490484027191997, + "rewards/rejected": 0.00021069117065053433, + "step": 260 + }, + { + "epoch": 0.14, + "learning_rate": 2.3519163763066202e-07, + "logits/chosen": 0.2989436984062195, + "logits/rejected": 0.3548172414302826, + "logps/chosen": -225.9089813232422, + "logps/rejected": -210.8122100830078, + "loss": 0.6925, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.00030976219568401575, + "rewards/margins": 0.00035605434095487, + "rewards/rejected": -4.629206523532048e-05, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 2.439024390243902e-07, + "logits/chosen": 0.30432480573654175, + "logits/rejected": 0.3280238211154938, + "logps/chosen": -294.1086730957031, + "logps/rejected": -242.9200897216797, + "loss": 0.6928, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": 0.0018314539920538664, + "rewards/margins": 0.0012845260789617896, + "rewards/rejected": 0.0005469276802614331, + "step": 280 + }, + { + "epoch": 0.15, + "learning_rate": 2.526132404181184e-07, + "logits/chosen": 0.3513438105583191, + "logits/rejected": 0.3013264238834381, + "logps/chosen": -258.6016540527344, + "logps/rejected": -270.3151550292969, + "loss": 0.6927, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.0009280891390517354, + "rewards/margins": 0.0018027920741587877, + "rewards/rejected": -0.0008747029933147132, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 2.613240418118467e-07, + "logits/chosen": 0.361990362405777, + "logits/rejected": 0.36707669496536255, + "logps/chosen": -347.64947509765625, + "logps/rejected": -320.00689697265625, + "loss": 0.6924, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": 0.0015204499941319227, + "rewards/margins": 0.0014906801516190171, + "rewards/rejected": 2.9769911634502932e-05, + "step": 300 + }, + { + "epoch": 0.16, + "learning_rate": 2.700348432055749e-07, + "logits/chosen": 0.3724101483821869, + "logits/rejected": 0.3676129877567291, + "logps/chosen": -317.96209716796875, + "logps/rejected": -273.7264709472656, + "loss": 0.6922, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": 0.0012596590677276254, + "rewards/margins": 0.0006816141540184617, + "rewards/rejected": 0.0005780447972938418, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 2.7874564459930313e-07, + "logits/chosen": 0.34939244389533997, + "logits/rejected": 0.35409045219421387, + "logps/chosen": -308.7851867675781, + "logps/rejected": -339.3348083496094, + "loss": 0.6926, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.002038495149463415, + "rewards/margins": 0.0028217299841344357, + "rewards/rejected": -0.0007832351257093251, + "step": 320 + }, + { + "epoch": 0.17, + "learning_rate": 2.874564459930314e-07, + "logits/chosen": 0.4071926176548004, + "logits/rejected": 0.3380196690559387, + "logps/chosen": -287.54901123046875, + "logps/rejected": -277.5677795410156, + "loss": 0.6924, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.0005766181275248528, + "rewards/margins": -0.00022260425612330437, + "rewards/rejected": 0.0007992222672328353, + "step": 330 + }, + { + "epoch": 0.18, + "learning_rate": 2.961672473867596e-07, + "logits/chosen": 0.43199872970581055, + "logits/rejected": 0.33364588022232056, + "logps/chosen": -308.38134765625, + "logps/rejected": -210.3172149658203, + "loss": 0.6922, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.0019256987143307924, + "rewards/margins": 0.0027559720911085606, + "rewards/rejected": -0.00083027349319309, + "step": 340 + }, + { + "epoch": 0.18, + "learning_rate": 3.048780487804878e-07, + "logits/chosen": 0.3490511476993561, + "logits/rejected": 0.39305615425109863, + "logps/chosen": -244.5614013671875, + "logps/rejected": -290.2716369628906, + "loss": 0.6921, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.0005816185148432851, + "rewards/margins": 0.00035765691427513957, + "rewards/rejected": 0.00022396161512006074, + "step": 350 + }, + { + "epoch": 0.19, + "learning_rate": 3.13588850174216e-07, + "logits/chosen": 0.4072600305080414, + "logits/rejected": 0.45239806175231934, + "logps/chosen": -251.4489288330078, + "logps/rejected": -262.99395751953125, + "loss": 0.6924, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": 0.00011031948088202626, + "rewards/margins": 0.0024239744525402784, + "rewards/rejected": -0.0023136548697948456, + "step": 360 + }, + { + "epoch": 0.19, + "learning_rate": 3.2229965156794425e-07, + "logits/chosen": 0.33437368273735046, + "logits/rejected": 0.2984531819820404, + "logps/chosen": -348.13482666015625, + "logps/rejected": -295.0039978027344, + "loss": 0.6921, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.0037342619616538286, + "rewards/margins": 0.004230900201946497, + "rewards/rejected": -0.000496637774631381, + "step": 370 + }, + { + "epoch": 0.2, + "learning_rate": 3.3101045296167245e-07, + "logits/chosen": 0.4131089746952057, + "logits/rejected": 0.3828733563423157, + "logps/chosen": -255.59707641601562, + "logps/rejected": -194.83799743652344, + "loss": 0.6919, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.0008168669301085174, + "rewards/margins": 0.002202791627496481, + "rewards/rejected": -0.0013859247555956244, + "step": 380 + }, + { + "epoch": 0.2, + "learning_rate": 3.3972125435540065e-07, + "logits/chosen": 0.31555676460266113, + "logits/rejected": 0.36338990926742554, + "logps/chosen": -309.2490234375, + "logps/rejected": -355.1193542480469, + "loss": 0.692, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.0020399349741637707, + "rewards/margins": 0.0011781295761466026, + "rewards/rejected": 0.0008618049323558807, + "step": 390 + }, + { + "epoch": 0.21, + "learning_rate": 3.484320557491289e-07, + "logits/chosen": 0.3376355767250061, + "logits/rejected": 0.35191774368286133, + "logps/chosen": -291.76214599609375, + "logps/rejected": -286.9822692871094, + "loss": 0.692, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.0032814189326018095, + "rewards/margins": 0.001435564481653273, + "rewards/rejected": 0.0018458545673638582, + "step": 400 + }, + { + "epoch": 0.21, + "learning_rate": 3.5714285714285716e-07, + "logits/chosen": 0.2917638421058655, + "logits/rejected": 0.22457854449748993, + "logps/chosen": -359.77838134765625, + "logps/rejected": -265.8029479980469, + "loss": 0.6917, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": 0.0006081667961552739, + "rewards/margins": 0.0009180738707073033, + "rewards/rejected": -0.0003099074529018253, + "step": 410 + }, + { + "epoch": 0.22, + "learning_rate": 3.6585365853658536e-07, + "logits/chosen": 0.35560792684555054, + "logits/rejected": 0.28562021255493164, + "logps/chosen": -377.1800842285156, + "logps/rejected": -215.2724609375, + "loss": 0.6914, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 0.0032722880132496357, + "rewards/margins": 0.005252937786281109, + "rewards/rejected": -0.0019806502386927605, + "step": 420 + }, + { + "epoch": 0.23, + "learning_rate": 3.7456445993031356e-07, + "logits/chosen": 0.3571794629096985, + "logits/rejected": 0.2926293611526489, + "logps/chosen": -344.73358154296875, + "logps/rejected": -269.90374755859375, + "loss": 0.6916, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.003971777856349945, + "rewards/margins": 0.0038075477350503206, + "rewards/rejected": 0.0001642298884689808, + "step": 430 + }, + { + "epoch": 0.23, + "learning_rate": 3.832752613240418e-07, + "logits/chosen": 0.33091968297958374, + "logits/rejected": 0.33137187361717224, + "logps/chosen": -269.0303955078125, + "logps/rejected": -273.2547302246094, + "loss": 0.6916, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.002539347391575575, + "rewards/margins": 0.0036257575266063213, + "rewards/rejected": -0.001086410484276712, + "step": 440 + }, + { + "epoch": 0.24, + "learning_rate": 3.9198606271777e-07, + "logits/chosen": 0.34520870447158813, + "logits/rejected": 0.3429456353187561, + "logps/chosen": -292.42303466796875, + "logps/rejected": -263.06427001953125, + "loss": 0.6914, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.0004831695114262402, + "rewards/margins": 0.002628723159432411, + "rewards/rejected": -0.0021455539390444756, + "step": 450 + }, + { + "epoch": 0.24, + "learning_rate": 4.006968641114982e-07, + "logits/chosen": 0.41168397665023804, + "logits/rejected": 0.33299416303634644, + "logps/chosen": -328.90643310546875, + "logps/rejected": -249.24636840820312, + "loss": 0.6909, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 0.0023158483672887087, + "rewards/margins": 0.0044514830224215984, + "rewards/rejected": -0.0021356348879635334, + "step": 460 + }, + { + "epoch": 0.25, + "learning_rate": 4.0940766550522647e-07, + "logits/chosen": 0.3450584411621094, + "logits/rejected": 0.3537040650844574, + "logps/chosen": -277.84783935546875, + "logps/rejected": -229.71878051757812, + "loss": 0.6912, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.0038313809782266617, + "rewards/margins": 0.005424472503364086, + "rewards/rejected": -0.001593091175891459, + "step": 470 + }, + { + "epoch": 0.25, + "learning_rate": 4.1811846689895467e-07, + "logits/chosen": 0.41893109679222107, + "logits/rejected": 0.3723496198654175, + "logps/chosen": -303.4540100097656, + "logps/rejected": -216.4768524169922, + "loss": 0.6914, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.004440109245479107, + "rewards/margins": 0.0051439059898257256, + "rewards/rejected": -0.0007037969189696014, + "step": 480 + }, + { + "epoch": 0.26, + "learning_rate": 4.268292682926829e-07, + "logits/chosen": 0.24236135184764862, + "logits/rejected": 0.28322139382362366, + "logps/chosen": -281.379638671875, + "logps/rejected": -330.65777587890625, + "loss": 0.6915, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.0020877670031040907, + "rewards/margins": 0.004719098098576069, + "rewards/rejected": -0.002631331095471978, + "step": 490 + }, + { + "epoch": 0.26, + "learning_rate": 4.3554006968641113e-07, + "logits/chosen": 0.4597233235836029, + "logits/rejected": 0.35320740938186646, + "logps/chosen": -331.86407470703125, + "logps/rejected": -262.0279541015625, + "loss": 0.6916, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.006418133620172739, + "rewards/margins": 0.005038060247898102, + "rewards/rejected": 0.0013800726737827063, + "step": 500 + }, + { + "epoch": 0.27, + "learning_rate": 4.442508710801394e-07, + "logits/chosen": 0.2914946973323822, + "logits/rejected": 0.24324540793895721, + "logps/chosen": -312.9110107421875, + "logps/rejected": -339.0050048828125, + "loss": 0.6911, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": 0.003171132178977132, + "rewards/margins": 0.0027220987249165773, + "rewards/rejected": 0.0004490331339184195, + "step": 510 + }, + { + "epoch": 0.27, + "learning_rate": 4.529616724738676e-07, + "logits/chosen": 0.3802244961261749, + "logits/rejected": 0.3113190531730652, + "logps/chosen": -324.1048889160156, + "logps/rejected": -262.59661865234375, + "loss": 0.6912, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.004197067115455866, + "rewards/margins": 0.006333489902317524, + "rewards/rejected": -0.002136422088369727, + "step": 520 + }, + { + "epoch": 0.28, + "learning_rate": 4.616724738675958e-07, + "logits/chosen": 0.336688756942749, + "logits/rejected": 0.32058995962142944, + "logps/chosen": -253.60986328125, + "logps/rejected": -264.2114562988281, + "loss": 0.6907, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.002829137723892927, + "rewards/margins": 0.005678371526300907, + "rewards/rejected": -0.0028492335695773363, + "step": 530 + }, + { + "epoch": 0.28, + "learning_rate": 4.7038327526132404e-07, + "logits/chosen": 0.4176596701145172, + "logits/rejected": 0.3263845443725586, + "logps/chosen": -376.275146484375, + "logps/rejected": -259.18896484375, + "loss": 0.6911, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.0038488004356622696, + "rewards/margins": 0.0056008342653512955, + "rewards/rejected": -0.0017520335968583822, + "step": 540 + }, + { + "epoch": 0.29, + "learning_rate": 4.790940766550523e-07, + "logits/chosen": 0.3719063103199005, + "logits/rejected": 0.37896281480789185, + "logps/chosen": -302.3629455566406, + "logps/rejected": -264.2347106933594, + "loss": 0.6911, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.0020447494462132454, + "rewards/margins": 0.004400859586894512, + "rewards/rejected": -0.0023561103735119104, + "step": 550 + }, + { + "epoch": 0.29, + "learning_rate": 4.878048780487804e-07, + "logits/chosen": 0.36305055022239685, + "logits/rejected": 0.24923209846019745, + "logps/chosen": -411.765869140625, + "logps/rejected": -349.3866271972656, + "loss": 0.6903, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 0.009467719122767448, + "rewards/margins": 0.007618948817253113, + "rewards/rejected": 0.001848770072683692, + "step": 560 + }, + { + "epoch": 0.3, + "learning_rate": 4.965156794425087e-07, + "logits/chosen": 0.3964191675186157, + "logits/rejected": 0.35975709557533264, + "logps/chosen": -298.0080261230469, + "logps/rejected": -267.7530517578125, + "loss": 0.6906, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.0032093741465359926, + "rewards/margins": 0.0026827100664377213, + "rewards/rejected": 0.0005266641965135932, + "step": 570 + }, + { + "epoch": 0.3, + "learning_rate": 4.999983312905696e-07, + "logits/chosen": 0.36271387338638306, + "logits/rejected": 0.27819180488586426, + "logps/chosen": -289.54852294921875, + "logps/rejected": -221.93734741210938, + "loss": 0.6898, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.003947210498154163, + "rewards/margins": 0.005013973917812109, + "rewards/rejected": -0.0010667633032426238, + "step": 580 + }, + { + "epoch": 0.31, + "learning_rate": 4.999881337025014e-07, + "logits/chosen": 0.26377126574516296, + "logits/rejected": 0.2488066852092743, + "logps/chosen": -268.46112060546875, + "logps/rejected": -226.2292022705078, + "loss": 0.6908, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": 0.0002535443636588752, + "rewards/margins": 0.001007495797239244, + "rewards/rejected": -0.0007539513753727078, + "step": 590 + }, + { + "epoch": 0.31, + "learning_rate": 4.999686659648518e-07, + "logits/chosen": 0.3953297436237335, + "logits/rejected": 0.35871651768684387, + "logps/chosen": -231.27572631835938, + "logps/rejected": -255.6056671142578, + "loss": 0.6894, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.003411300480365753, + "rewards/margins": 0.004523693583905697, + "rewards/rejected": -0.0011123933363705873, + "step": 600 + }, + { + "epoch": 0.32, + "learning_rate": 4.999399287995302e-07, + "logits/chosen": 0.37459492683410645, + "logits/rejected": 0.46103644371032715, + "logps/chosen": -246.6116180419922, + "logps/rejected": -293.78448486328125, + "loss": 0.6894, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": 0.00318206031806767, + "rewards/margins": 0.006666205823421478, + "rewards/rejected": -0.003484147135168314, + "step": 610 + }, + { + "epoch": 0.32, + "learning_rate": 4.999019232721791e-07, + "logits/chosen": 0.33487755060195923, + "logits/rejected": 0.3410380482673645, + "logps/chosen": -358.2688903808594, + "logps/rejected": -283.22314453125, + "loss": 0.6895, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 0.006958150304853916, + "rewards/margins": 0.009823032654821873, + "rewards/rejected": -0.002864880720153451, + "step": 620 + }, + { + "epoch": 0.33, + "learning_rate": 4.998546507921325e-07, + "logits/chosen": 0.33910471200942993, + "logits/rejected": 0.3406114876270294, + "logps/chosen": -256.4313049316406, + "logps/rejected": -276.10064697265625, + "loss": 0.6891, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.000572996330447495, + "rewards/margins": 0.002004786394536495, + "rewards/rejected": -0.0014317900640890002, + "step": 630 + }, + { + "epoch": 0.33, + "learning_rate": 4.997981131123656e-07, + "logits/chosen": 0.30870598554611206, + "logits/rejected": 0.27435797452926636, + "logps/chosen": -383.6075439453125, + "logps/rejected": -331.48614501953125, + "loss": 0.6886, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.009466699324548244, + "rewards/margins": 0.011064816266298294, + "rewards/rejected": -0.0015981157775968313, + "step": 640 + }, + { + "epoch": 0.34, + "learning_rate": 4.997323123294291e-07, + "logits/chosen": 0.35772138833999634, + "logits/rejected": 0.34295064210891724, + "logps/chosen": -316.08355712890625, + "logps/rejected": -233.4802703857422, + "loss": 0.6904, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 0.005766328424215317, + "rewards/margins": 0.008546034805476665, + "rewards/rejected": -0.0027797059156000614, + "step": 650 + }, + { + "epoch": 0.35, + "learning_rate": 4.99657250883371e-07, + "logits/chosen": 0.3746064305305481, + "logits/rejected": 0.3391752243041992, + "logps/chosen": -264.4750061035156, + "logps/rejected": -204.13467407226562, + "loss": 0.6897, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.003877130104228854, + "rewards/margins": 0.005806891247630119, + "rewards/rejected": -0.0019297614926472306, + "step": 660 + }, + { + "epoch": 0.35, + "learning_rate": 4.995729315576468e-07, + "logits/chosen": 0.40420597791671753, + "logits/rejected": 0.33833855390548706, + "logps/chosen": -276.43310546875, + "logps/rejected": -219.8043212890625, + "loss": 0.6894, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 0.006816673092544079, + "rewards/margins": 0.008805202320218086, + "rewards/rejected": -0.0019885296933352947, + "step": 670 + }, + { + "epoch": 0.36, + "learning_rate": 4.99479357479016e-07, + "logits/chosen": 0.22999343276023865, + "logits/rejected": 0.3004499673843384, + "logps/chosen": -210.9781951904297, + "logps/rejected": -218.9191131591797, + "loss": 0.6902, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.0006085085915401578, + "rewards/margins": 0.002932955976575613, + "rewards/rejected": -0.0035414646845310926, + "step": 680 + }, + { + "epoch": 0.36, + "learning_rate": 4.993765321174261e-07, + "logits/chosen": 0.36885711550712585, + "logits/rejected": 0.33634617924690247, + "logps/chosen": -306.89495849609375, + "logps/rejected": -315.4165954589844, + "loss": 0.6891, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.0054230643436312675, + "rewards/margins": 0.010317849926650524, + "rewards/rejected": -0.004894785117357969, + "step": 690 + }, + { + "epoch": 0.37, + "learning_rate": 4.992644592858842e-07, + "logits/chosen": 0.4698862135410309, + "logits/rejected": 0.46879833936691284, + "logps/chosen": -224.7325439453125, + "logps/rejected": -254.5215606689453, + "loss": 0.6889, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.004678064025938511, + "rewards/margins": 0.008384588174521923, + "rewards/rejected": -0.003706523682922125, + "step": 700 + }, + { + "epoch": 0.37, + "learning_rate": 4.991431431403148e-07, + "logits/chosen": 0.16423548758029938, + "logits/rejected": 0.24411818385124207, + "logps/chosen": -275.36859130859375, + "logps/rejected": -309.1749267578125, + "loss": 0.6886, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.00013561574451159686, + "rewards/margins": -0.0016555249458178878, + "rewards/rejected": 0.001791140646673739, + "step": 710 + }, + { + "epoch": 0.38, + "learning_rate": 4.99012588179407e-07, + "logits/chosen": 0.26659077405929565, + "logits/rejected": 0.2297864407300949, + "logps/chosen": -337.4370422363281, + "logps/rejected": -240.46920776367188, + "loss": 0.6887, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.0036331997253000736, + "rewards/margins": 0.010251880623400211, + "rewards/rejected": -0.006618679966777563, + "step": 720 + }, + { + "epoch": 0.38, + "learning_rate": 4.988727992444467e-07, + "logits/chosen": 0.4517492651939392, + "logits/rejected": 0.4217531085014343, + "logps/chosen": -292.5274353027344, + "logps/rejected": -297.12554931640625, + "loss": 0.6883, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.007517491467297077, + "rewards/margins": 0.007671818137168884, + "rewards/rejected": -0.0001543281105114147, + "step": 730 + }, + { + "epoch": 0.39, + "learning_rate": 4.98723781519137e-07, + "logits/chosen": 0.26132479310035706, + "logits/rejected": 0.28247007727622986, + "logps/chosen": -238.7399444580078, + "logps/rejected": -270.9232482910156, + "loss": 0.6891, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": 0.0010348012438043952, + "rewards/margins": 0.005723314359784126, + "rewards/rejected": -0.0046885134652256966, + "step": 740 + }, + { + "epoch": 0.39, + "learning_rate": 4.98565540529407e-07, + "logits/chosen": 0.36184945702552795, + "logits/rejected": 0.33014923334121704, + "logps/chosen": -332.16546630859375, + "logps/rejected": -268.51068115234375, + "loss": 0.6885, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.002668363507837057, + "rewards/margins": 0.003941279370337725, + "rewards/rejected": -0.0012729157460853457, + "step": 750 + }, + { + "epoch": 0.4, + "learning_rate": 4.983980821432054e-07, + "logits/chosen": 0.3811706602573395, + "logits/rejected": 0.4038323760032654, + "logps/chosen": -318.3126220703125, + "logps/rejected": -300.29571533203125, + "loss": 0.6881, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.008891480974853039, + "rewards/margins": 0.014289113692939281, + "rewards/rejected": -0.005397631786763668, + "step": 760 + }, + { + "epoch": 0.4, + "learning_rate": 4.982214125702845e-07, + "logits/chosen": 0.37229424715042114, + "logits/rejected": 0.38184842467308044, + "logps/chosen": -255.6134796142578, + "logps/rejected": -256.50103759765625, + "loss": 0.6882, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.004782395903021097, + "rewards/margins": 0.011424211785197258, + "rewards/rejected": -0.0066418154165148735, + "step": 770 + }, + { + "epoch": 0.41, + "learning_rate": 4.980355383619684e-07, + "logits/chosen": 0.36553826928138733, + "logits/rejected": 0.33468884229660034, + "logps/chosen": -290.0172424316406, + "logps/rejected": -183.47616577148438, + "loss": 0.6886, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.004404271487146616, + "rewards/margins": 0.009764621965587139, + "rewards/rejected": -0.0053603509441018105, + "step": 780 + }, + { + "epoch": 0.41, + "learning_rate": 4.978404664109113e-07, + "logits/chosen": 0.3125520348548889, + "logits/rejected": 0.2413829267024994, + "logps/chosen": -377.26361083984375, + "logps/rejected": -280.342041015625, + "loss": 0.6873, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.006130435969680548, + "rewards/margins": 0.011444391682744026, + "rewards/rejected": -0.0053139557130634785, + "step": 790 + }, + { + "epoch": 0.42, + "learning_rate": 4.97636203950841e-07, + "logits/chosen": 0.3133540749549866, + "logits/rejected": 0.2622864246368408, + "logps/chosen": -335.50579833984375, + "logps/rejected": -279.72174072265625, + "loss": 0.688, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 0.011037923395633698, + "rewards/margins": 0.015714969485998154, + "rewards/rejected": -0.004677046090364456, + "step": 800 + }, + { + "epoch": 0.42, + "learning_rate": 4.974227585562916e-07, + "logits/chosen": 0.2856473922729492, + "logits/rejected": 0.21030691266059875, + "logps/chosen": -351.89117431640625, + "logps/rejected": -308.099365234375, + "loss": 0.6871, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 0.013937944546341896, + "rewards/margins": 0.019101398065686226, + "rewards/rejected": -0.005163452588021755, + "step": 810 + }, + { + "epoch": 0.43, + "learning_rate": 4.972001381423214e-07, + "logits/chosen": 0.38940927386283875, + "logits/rejected": 0.38752490282058716, + "logps/chosen": -377.74261474609375, + "logps/rejected": -341.78033447265625, + "loss": 0.6886, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.012508844025433064, + "rewards/margins": 0.011580301448702812, + "rewards/rejected": 0.0009285411797463894, + "step": 820 + }, + { + "epoch": 0.43, + "learning_rate": 4.969683509642206e-07, + "logits/chosen": 0.30638226866722107, + "logits/rejected": 0.33688345551490784, + "logps/chosen": -257.49383544921875, + "logps/rejected": -234.21389770507812, + "loss": 0.6864, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.0007948580896481872, + "rewards/margins": 0.0011654215632006526, + "rewards/rejected": -0.001960279420018196, + "step": 830 + }, + { + "epoch": 0.44, + "learning_rate": 4.967274056172044e-07, + "logits/chosen": 0.436321496963501, + "logits/rejected": 0.3269069790840149, + "logps/chosen": -264.94000244140625, + "logps/rejected": -235.1780242919922, + "loss": 0.6884, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.004068738780915737, + "rewards/margins": 0.012811449356377125, + "rewards/rejected": -0.008742708712816238, + "step": 840 + }, + { + "epoch": 0.44, + "learning_rate": 4.964773110360944e-07, + "logits/chosen": 0.3955819606781006, + "logits/rejected": 0.4442331790924072, + "logps/chosen": -211.5360107421875, + "logps/rejected": -221.88296508789062, + "loss": 0.6868, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": 0.0063387490808963776, + "rewards/margins": 0.009760014712810516, + "rewards/rejected": -0.003421264933422208, + "step": 850 + }, + { + "epoch": 0.45, + "learning_rate": 4.962180764949876e-07, + "logits/chosen": 0.3990401327610016, + "logits/rejected": 0.3605530261993408, + "logps/chosen": -293.21966552734375, + "logps/rejected": -233.4999237060547, + "loss": 0.6885, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.008997827768325806, + "rewards/margins": 0.01379807572811842, + "rewards/rejected": -0.004800247959792614, + "step": 860 + }, + { + "epoch": 0.46, + "learning_rate": 4.959497116069122e-07, + "logits/chosen": 0.4126970171928406, + "logits/rejected": 0.306619793176651, + "logps/chosen": -291.76324462890625, + "logps/rejected": -283.7310791015625, + "loss": 0.6873, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.005910122767090797, + "rewards/margins": 0.011911979876458645, + "rewards/rejected": -0.0060018557123839855, + "step": 870 + }, + { + "epoch": 0.46, + "learning_rate": 4.956722263234711e-07, + "logits/chosen": 0.39139634370803833, + "logits/rejected": 0.36863797903060913, + "logps/chosen": -271.8345947265625, + "logps/rejected": -251.8036651611328, + "loss": 0.6869, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.0030030894558876753, + "rewards/margins": 0.009311128407716751, + "rewards/rejected": -0.006308038718998432, + "step": 880 + }, + { + "epoch": 0.47, + "learning_rate": 4.95385630934473e-07, + "logits/chosen": 0.37733954191207886, + "logits/rejected": 0.3702600598335266, + "logps/chosen": -239.25918579101562, + "logps/rejected": -221.35079956054688, + "loss": 0.6881, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 0.0026471063029021025, + "rewards/margins": 0.005196579732000828, + "rewards/rejected": -0.002549473661929369, + "step": 890 + }, + { + "epoch": 0.47, + "learning_rate": 4.950899360675511e-07, + "logits/chosen": 0.42558950185775757, + "logits/rejected": 0.44111356139183044, + "logps/chosen": -251.04159545898438, + "logps/rejected": -266.29266357421875, + "loss": 0.6875, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.004267251584678888, + "rewards/margins": 0.00734188174828887, + "rewards/rejected": -0.003074630629271269, + "step": 900 + }, + { + "epoch": 0.48, + "learning_rate": 4.947851526877681e-07, + "logits/chosen": 0.3045174181461334, + "logits/rejected": 0.2997366189956665, + "logps/chosen": -311.9551696777344, + "logps/rejected": -285.98370361328125, + "loss": 0.6847, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.003712692065164447, + "rewards/margins": 0.0008770062704570591, + "rewards/rejected": 0.0028356860857456923, + "step": 910 + }, + { + "epoch": 0.48, + "learning_rate": 4.944712920972108e-07, + "logits/chosen": 0.34604376554489136, + "logits/rejected": 0.3185519278049469, + "logps/chosen": -317.5331115722656, + "logps/rejected": -317.90008544921875, + "loss": 0.6867, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.010279752314090729, + "rewards/margins": 0.013352531008422375, + "rewards/rejected": -0.0030727770645171404, + "step": 920 + }, + { + "epoch": 0.49, + "learning_rate": 4.9414836593457e-07, + "logits/chosen": 0.24341456592082977, + "logits/rejected": 0.33237552642822266, + "logps/chosen": -277.3923034667969, + "logps/rejected": -256.6651611328125, + "loss": 0.6847, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.0015904136234894395, + "rewards/margins": 0.01141836866736412, + "rewards/rejected": -0.013008782640099525, + "step": 930 + }, + { + "epoch": 0.49, + "learning_rate": 4.938163861747094e-07, + "logits/chosen": 0.37131667137145996, + "logits/rejected": 0.30167001485824585, + "logps/chosen": -365.1321716308594, + "logps/rejected": -231.3142547607422, + "loss": 0.6855, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.016867507249116898, + "rewards/margins": 0.02898784913122654, + "rewards/rejected": -0.012120342813432217, + "step": 940 + }, + { + "epoch": 0.5, + "learning_rate": 4.934753651282215e-07, + "logits/chosen": 0.34957343339920044, + "logits/rejected": 0.3650115132331848, + "logps/chosen": -249.6451873779297, + "logps/rejected": -238.812255859375, + "loss": 0.6888, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": 0.0006063526379875839, + "rewards/margins": 0.00888439454138279, + "rewards/rejected": -0.008278042078018188, + "step": 950 + }, + { + "epoch": 0.5, + "learning_rate": 4.93125315440971e-07, + "logits/chosen": 0.28446871042251587, + "logits/rejected": 0.33393317461013794, + "logps/chosen": -221.7951202392578, + "logps/rejected": -222.3719940185547, + "loss": 0.6872, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.00030084262834861875, + "rewards/margins": 0.0024019861593842506, + "rewards/rejected": -0.002101143589243293, + "step": 960 + }, + { + "epoch": 0.51, + "learning_rate": 4.92766250093626e-07, + "logits/chosen": 0.3894736170768738, + "logits/rejected": 0.42692357301712036, + "logps/chosen": -278.0799560546875, + "logps/rejected": -277.92578125, + "loss": 0.6857, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": 0.005217918194830418, + "rewards/margins": 0.01182524859905243, + "rewards/rejected": -0.006607329938560724, + "step": 970 + }, + { + "epoch": 0.51, + "learning_rate": 4.92398182401176e-07, + "logits/chosen": 0.36386704444885254, + "logits/rejected": 0.34401121735572815, + "logps/chosen": -272.7413330078125, + "logps/rejected": -258.5047607421875, + "loss": 0.6855, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.007803539279848337, + "rewards/margins": 0.012347839772701263, + "rewards/rejected": -0.004544299561530352, + "step": 980 + }, + { + "epoch": 0.52, + "learning_rate": 4.920211260124395e-07, + "logits/chosen": 0.31229716539382935, + "logits/rejected": 0.33428624272346497, + "logps/chosen": -271.045166015625, + "logps/rejected": -287.309326171875, + "loss": 0.6865, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 0.011570685543119907, + "rewards/margins": 0.013975699432194233, + "rewards/rejected": -0.002405013656243682, + "step": 990 + }, + { + "epoch": 0.52, + "learning_rate": 4.916350949095566e-07, + "logits/chosen": 0.35035276412963867, + "logits/rejected": 0.3598233163356781, + "logps/chosen": -321.33612060546875, + "logps/rejected": -259.4188537597656, + "loss": 0.6868, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.012103838846087456, + "rewards/margins": 0.023051228374242783, + "rewards/rejected": -0.010947388596832752, + "step": 1000 + }, + { + "epoch": 0.53, + "learning_rate": 4.912401034074708e-07, + "logits/chosen": 0.3462555408477783, + "logits/rejected": 0.35642725229263306, + "logps/chosen": -245.738525390625, + "logps/rejected": -185.628173828125, + "loss": 0.6847, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.0006697209319099784, + "rewards/margins": 0.014949485659599304, + "rewards/rejected": -0.014279766008257866, + "step": 1010 + }, + { + "epoch": 0.53, + "learning_rate": 4.908361661533989e-07, + "logits/chosen": 0.2858627438545227, + "logits/rejected": 0.2974536120891571, + "logps/chosen": -314.45263671875, + "logps/rejected": -301.51995849609375, + "loss": 0.6851, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 0.004904334433376789, + "rewards/margins": 0.014790430665016174, + "rewards/rejected": -0.009886096231639385, + "step": 1020 + }, + { + "epoch": 0.54, + "learning_rate": 4.904232981262866e-07, + "logits/chosen": 0.3912216126918793, + "logits/rejected": 0.3437424898147583, + "logps/chosen": -318.65496826171875, + "logps/rejected": -290.3804931640625, + "loss": 0.6842, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.014556080102920532, + "rewards/margins": 0.027198363095521927, + "rewards/rejected": -0.012642279267311096, + "step": 1030 + }, + { + "epoch": 0.54, + "learning_rate": 4.900015146362544e-07, + "logits/chosen": 0.32560548186302185, + "logits/rejected": 0.32246512174606323, + "logps/chosen": -285.6897277832031, + "logps/rejected": -261.7762145996094, + "loss": 0.6845, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 0.013799789361655712, + "rewards/margins": 0.030853647738695145, + "rewards/rejected": -0.017053861171007156, + "step": 1040 + }, + { + "epoch": 0.55, + "learning_rate": 4.895708313240285e-07, + "logits/chosen": 0.34826239943504333, + "logits/rejected": 0.36765581369400024, + "logps/chosen": -280.7046813964844, + "logps/rejected": -252.820556640625, + "loss": 0.6842, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.0033273245207965374, + "rewards/margins": 0.010536163114011288, + "rewards/rejected": -0.007208839058876038, + "step": 1050 + }, + { + "epoch": 0.55, + "learning_rate": 4.891312641603623e-07, + "logits/chosen": 0.3264276087284088, + "logits/rejected": 0.3249642252922058, + "logps/chosen": -367.77685546875, + "logps/rejected": -318.3257751464844, + "loss": 0.6841, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.017363909631967545, + "rewards/margins": 0.02406318113207817, + "rewards/rejected": -0.00669927429407835, + "step": 1060 + }, + { + "epoch": 0.56, + "learning_rate": 4.886828294454426e-07, + "logits/chosen": 0.3919423222541809, + "logits/rejected": 0.3063061833381653, + "logps/chosen": -351.09088134765625, + "logps/rejected": -332.01043701171875, + "loss": 0.6845, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.009391558356583118, + "rewards/margins": 0.020744940266013145, + "rewards/rejected": -0.011353382840752602, + "step": 1070 + }, + { + "epoch": 0.57, + "learning_rate": 4.882255438082863e-07, + "logits/chosen": 0.45179158449172974, + "logits/rejected": 0.41871339082717896, + "logps/chosen": -283.4506530761719, + "logps/rejected": -272.11578369140625, + "loss": 0.6848, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.012173324823379517, + "rewards/margins": 0.019558671861886978, + "rewards/rejected": -0.007385346107184887, + "step": 1080 + }, + { + "epoch": 0.57, + "learning_rate": 4.877594242061233e-07, + "logits/chosen": 0.32700738310813904, + "logits/rejected": 0.3105686604976654, + "logps/chosen": -324.06292724609375, + "logps/rejected": -242.7261199951172, + "loss": 0.6857, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.0017802860820665956, + "rewards/margins": 0.01658160611987114, + "rewards/rejected": -0.014801318757236004, + "step": 1090 + }, + { + "epoch": 0.58, + "learning_rate": 4.87284487923768e-07, + "logits/chosen": 0.3384407162666321, + "logits/rejected": 0.2847151458263397, + "logps/chosen": -283.078857421875, + "logps/rejected": -243.68801879882812, + "loss": 0.6847, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.01340520940721035, + "rewards/margins": 0.025466054677963257, + "rewards/rejected": -0.012060845270752907, + "step": 1100 + }, + { + "epoch": 0.58, + "learning_rate": 4.868007525729775e-07, + "logits/chosen": 0.39033299684524536, + "logits/rejected": 0.3908047676086426, + "logps/chosen": -270.23675537109375, + "logps/rejected": -233.9051971435547, + "loss": 0.6853, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.00840029213577509, + "rewards/margins": 0.017927926033735275, + "rewards/rejected": -0.00952763482928276, + "step": 1110 + }, + { + "epoch": 0.59, + "learning_rate": 4.863082360917998e-07, + "logits/chosen": 0.4334394335746765, + "logits/rejected": 0.37194570899009705, + "logps/chosen": -301.4029846191406, + "logps/rejected": -237.0880126953125, + "loss": 0.6815, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.008443480357527733, + "rewards/margins": 0.023213211447000504, + "rewards/rejected": -0.014769727364182472, + "step": 1120 + }, + { + "epoch": 0.59, + "learning_rate": 4.858069567439072e-07, + "logits/chosen": 0.33737045526504517, + "logits/rejected": 0.335934579372406, + "logps/chosen": -253.21676635742188, + "logps/rejected": -264.36468505859375, + "loss": 0.6867, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.003204932902008295, + "rewards/margins": 0.007723759859800339, + "rewards/rejected": -0.004518826492130756, + "step": 1130 + }, + { + "epoch": 0.6, + "learning_rate": 4.852969331179206e-07, + "logits/chosen": 0.31442832946777344, + "logits/rejected": 0.2894567549228668, + "logps/chosen": -292.9564208984375, + "logps/rejected": -292.08966064453125, + "loss": 0.6845, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 0.007902948185801506, + "rewards/margins": 0.018108747899532318, + "rewards/rejected": -0.010205797851085663, + "step": 1140 + }, + { + "epoch": 0.6, + "learning_rate": 4.847781841267185e-07, + "logits/chosen": 0.3472171425819397, + "logits/rejected": 0.30964428186416626, + "logps/chosen": -306.6994934082031, + "logps/rejected": -229.86032104492188, + "loss": 0.6832, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.011110781691968441, + "rewards/margins": 0.023439984768629074, + "rewards/rejected": -0.012329204939305782, + "step": 1150 + }, + { + "epoch": 0.61, + "learning_rate": 4.842507290067374e-07, + "logits/chosen": 0.3105614483356476, + "logits/rejected": 0.2698826789855957, + "logps/chosen": -256.3908386230469, + "logps/rejected": -250.67025756835938, + "loss": 0.6865, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.004344923421740532, + "rewards/margins": 0.013838335871696472, + "rewards/rejected": -0.018183257430791855, + "step": 1160 + }, + { + "epoch": 0.61, + "learning_rate": 4.837145873172567e-07, + "logits/chosen": 0.3267471194267273, + "logits/rejected": 0.3460347354412079, + "logps/chosen": -251.1807861328125, + "logps/rejected": -216.62417602539062, + "loss": 0.6838, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.0053130751475691795, + "rewards/margins": 0.009674773551523685, + "rewards/rejected": -0.014987850561738014, + "step": 1170 + }, + { + "epoch": 0.62, + "learning_rate": 4.83169778939675e-07, + "logits/chosen": 0.3920978903770447, + "logits/rejected": 0.4109751284122467, + "logps/chosen": -290.7029724121094, + "logps/rejected": -280.5768127441406, + "loss": 0.6814, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.006850590463727713, + "rewards/margins": 0.013157842680811882, + "rewards/rejected": -0.006307253148406744, + "step": 1180 + }, + { + "epoch": 0.62, + "learning_rate": 4.826163240767716e-07, + "logits/chosen": 0.41425761580467224, + "logits/rejected": 0.3763046860694885, + "logps/chosen": -382.89776611328125, + "logps/rejected": -345.54742431640625, + "loss": 0.6838, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.02900487184524536, + "rewards/margins": 0.03568483516573906, + "rewards/rejected": -0.006679965648800135, + "step": 1190 + }, + { + "epoch": 0.63, + "learning_rate": 4.820542432519584e-07, + "logits/chosen": 0.28830844163894653, + "logits/rejected": 0.341747522354126, + "logps/chosen": -325.1733703613281, + "logps/rejected": -350.9265441894531, + "loss": 0.686, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.00515395263209939, + "rewards/margins": 0.01350557804107666, + "rewards/rejected": -0.008351625874638557, + "step": 1200 + }, + { + "epoch": 0.63, + "learning_rate": 4.814835573085176e-07, + "logits/chosen": 0.4174725115299225, + "logits/rejected": 0.4214004874229431, + "logps/chosen": -245.7711944580078, + "logps/rejected": -214.4864959716797, + "loss": 0.6826, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.003640042617917061, + "rewards/margins": 0.016179818660020828, + "rewards/rejected": -0.012539774179458618, + "step": 1210 + }, + { + "epoch": 0.64, + "learning_rate": 4.809042874088304e-07, + "logits/chosen": 0.353881299495697, + "logits/rejected": 0.2757090926170349, + "logps/chosen": -321.21514892578125, + "logps/rejected": -249.38925170898438, + "loss": 0.683, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.009076332673430443, + "rewards/margins": 0.027345478534698486, + "rewards/rejected": -0.018269145861268044, + "step": 1220 + }, + { + "epoch": 0.64, + "learning_rate": 4.803164550335905e-07, + "logits/chosen": 0.4418944716453552, + "logits/rejected": 0.39826610684394836, + "logps/chosen": -257.59466552734375, + "logps/rejected": -207.9666290283203, + "loss": 0.6843, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 0.007926455698907375, + "rewards/margins": 0.021452203392982483, + "rewards/rejected": -0.013525748625397682, + "step": 1230 + }, + { + "epoch": 0.65, + "learning_rate": 4.797200819810089e-07, + "logits/chosen": 0.28744739294052124, + "logits/rejected": 0.2932237386703491, + "logps/chosen": -343.52764892578125, + "logps/rejected": -286.6883850097656, + "loss": 0.6801, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 0.024472234770655632, + "rewards/margins": 0.036373551934957504, + "rewards/rejected": -0.011901318095624447, + "step": 1240 + }, + { + "epoch": 0.65, + "learning_rate": 4.79115190366005e-07, + "logits/chosen": 0.428488165140152, + "logits/rejected": 0.39333200454711914, + "logps/chosen": -311.669921875, + "logps/rejected": -284.22674560546875, + "loss": 0.6825, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 0.014714357443153858, + "rewards/margins": 0.025674622505903244, + "rewards/rejected": -0.010960264131426811, + "step": 1250 + }, + { + "epoch": 0.66, + "learning_rate": 4.785018026193862e-07, + "logits/chosen": 0.28775107860565186, + "logits/rejected": 0.279806911945343, + "logps/chosen": -261.11627197265625, + "logps/rejected": -251.590087890625, + "loss": 0.6835, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.0010452417191118002, + "rewards/margins": 0.009592377580702305, + "rewards/rejected": -0.010637620463967323, + "step": 1260 + }, + { + "epoch": 0.66, + "learning_rate": 4.77879941487017e-07, + "logits/chosen": 0.30096435546875, + "logits/rejected": 0.30046865344047546, + "logps/chosen": -263.289306640625, + "logps/rejected": -222.4429473876953, + "loss": 0.6828, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 0.012327324599027634, + "rewards/margins": 0.0253997091203928, + "rewards/rejected": -0.013072386384010315, + "step": 1270 + }, + { + "epoch": 0.67, + "learning_rate": 4.772496300289748e-07, + "logits/chosen": 0.4783431887626648, + "logits/rejected": 0.4230971336364746, + "logps/chosen": -283.7425842285156, + "logps/rejected": -240.39730834960938, + "loss": 0.6814, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.013720624148845673, + "rewards/margins": 0.03198980540037155, + "rewards/rejected": -0.018269184976816177, + "step": 1280 + }, + { + "epoch": 0.68, + "learning_rate": 4.766108916186949e-07, + "logits/chosen": 0.21571576595306396, + "logits/rejected": 0.2778613567352295, + "logps/chosen": -341.9070129394531, + "logps/rejected": -331.6284484863281, + "loss": 0.6829, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": 0.0014383181696757674, + "rewards/margins": 0.017966976389288902, + "rewards/rejected": -0.0165286585688591, + "step": 1290 + }, + { + "epoch": 0.68, + "learning_rate": 4.759637499421042e-07, + "logits/chosen": 0.303109347820282, + "logits/rejected": 0.30239337682724, + "logps/chosen": -323.6648864746094, + "logps/rejected": -240.1118927001953, + "loss": 0.6771, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 0.026541758328676224, + "rewards/margins": 0.03878582641482353, + "rewards/rejected": -0.012244068086147308, + "step": 1300 + }, + { + "epoch": 0.69, + "learning_rate": 4.7530822899674207e-07, + "logits/chosen": 0.3453063666820526, + "logits/rejected": 0.2573213577270508, + "logps/chosen": -351.45123291015625, + "logps/rejected": -256.66986083984375, + "loss": 0.6843, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.0206809863448143, + "rewards/margins": 0.021090159192681313, + "rewards/rejected": -0.00040917136357165873, + "step": 1310 + }, + { + "epoch": 0.69, + "learning_rate": 4.7464435309087137e-07, + "logits/chosen": 0.42679348587989807, + "logits/rejected": 0.3228249251842499, + "logps/chosen": -314.97119140625, + "logps/rejected": -237.63192749023438, + "loss": 0.6836, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 0.004733038134872913, + "rewards/margins": 0.035116709768772125, + "rewards/rejected": -0.030383672565221786, + "step": 1320 + }, + { + "epoch": 0.7, + "learning_rate": 4.739721468425763e-07, + "logits/chosen": 0.31200721859931946, + "logits/rejected": 0.30873575806617737, + "logps/chosen": -308.97442626953125, + "logps/rejected": -295.93328857421875, + "loss": 0.6823, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.008293518796563148, + "rewards/margins": 0.021679170429706573, + "rewards/rejected": -0.013385653495788574, + "step": 1330 + }, + { + "epoch": 0.7, + "learning_rate": 4.7329163517885e-07, + "logits/chosen": 0.34352797269821167, + "logits/rejected": 0.34060853719711304, + "logps/chosen": -312.0477600097656, + "logps/rejected": -310.62890625, + "loss": 0.6805, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.020096898078918457, + "rewards/margins": 0.03874671831727028, + "rewards/rejected": -0.018649814650416374, + "step": 1340 + }, + { + "epoch": 0.71, + "learning_rate": 4.7260284333466973e-07, + "logits/chosen": 0.370521605014801, + "logits/rejected": 0.36052078008651733, + "logps/chosen": -276.4061279296875, + "logps/rejected": -250.52090454101562, + "loss": 0.6838, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": 0.002161466982215643, + "rewards/margins": 0.0014650661032646894, + "rewards/rejected": 0.0006964011117815971, + "step": 1350 + }, + { + "epoch": 0.71, + "learning_rate": 4.719057968520617e-07, + "logits/chosen": 0.34359192848205566, + "logits/rejected": 0.3505293130874634, + "logps/chosen": -275.33087158203125, + "logps/rejected": -249.4975128173828, + "loss": 0.681, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": 0.006505683064460754, + "rewards/margins": 0.008261348120868206, + "rewards/rejected": -0.001755664823576808, + "step": 1360 + }, + { + "epoch": 0.72, + "learning_rate": 4.7120052157915345e-07, + "logits/chosen": 0.2927667796611786, + "logits/rejected": 0.33267074823379517, + "logps/chosen": -290.85272216796875, + "logps/rejected": -285.813720703125, + "loss": 0.6777, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.006295072380453348, + "rewards/margins": 0.025114480406045914, + "rewards/rejected": -0.018819406628608704, + "step": 1370 + }, + { + "epoch": 0.72, + "learning_rate": 4.7048704366921537e-07, + "logits/chosen": 0.4149307310581207, + "logits/rejected": 0.4059115946292877, + "logps/chosen": -220.0292205810547, + "logps/rejected": -171.5416717529297, + "loss": 0.6849, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 0.000204659256269224, + "rewards/margins": 0.025315681472420692, + "rewards/rejected": -0.025111019611358643, + "step": 1380 + }, + { + "epoch": 0.73, + "learning_rate": 4.6976538957969114e-07, + "logits/chosen": 0.496499240398407, + "logits/rejected": 0.4356865882873535, + "logps/chosen": -280.1456298828125, + "logps/rejected": -193.3099365234375, + "loss": 0.68, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.017452292144298553, + "rewards/margins": 0.04280317574739456, + "rewards/rejected": -0.02535087987780571, + "step": 1390 + }, + { + "epoch": 0.73, + "learning_rate": 4.690355860712163e-07, + "logits/chosen": 0.46831613779067993, + "logits/rejected": 0.49073824286460876, + "logps/chosen": -228.4839324951172, + "logps/rejected": -255.85720825195312, + "loss": 0.6817, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.0025743378791958094, + "rewards/margins": 0.01455251406878233, + "rewards/rejected": -0.01712685264647007, + "step": 1400 + }, + { + "epoch": 0.74, + "learning_rate": 4.682976602066262e-07, + "logits/chosen": 0.3690539002418518, + "logits/rejected": 0.3545624613761902, + "logps/chosen": -252.1142578125, + "logps/rejected": -207.72763061523438, + "loss": 0.6794, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.0002451833279337734, + "rewards/margins": 0.024905264377593994, + "rewards/rejected": -0.025150448083877563, + "step": 1410 + }, + { + "epoch": 0.74, + "learning_rate": 4.6755163934995224e-07, + "logits/chosen": 0.3355734646320343, + "logits/rejected": 0.33245348930358887, + "logps/chosen": -323.6839599609375, + "logps/rejected": -270.827392578125, + "loss": 0.6787, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.008448930457234383, + "rewards/margins": 0.024179551750421524, + "rewards/rejected": -0.01573062129318714, + "step": 1420 + }, + { + "epoch": 0.75, + "learning_rate": 4.667975511654072e-07, + "logits/chosen": 0.38984426856040955, + "logits/rejected": 0.3442505896091461, + "logps/chosen": -336.49505615234375, + "logps/rejected": -267.2830810546875, + "loss": 0.6803, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.005654610693454742, + "rewards/margins": 0.03083883225917816, + "rewards/rejected": -0.02518421970307827, + "step": 1430 + }, + { + "epoch": 0.75, + "learning_rate": 4.660354236163595e-07, + "logits/chosen": 0.29653918743133545, + "logits/rejected": 0.3196846544742584, + "logps/chosen": -326.2195129394531, + "logps/rejected": -311.04278564453125, + "loss": 0.6814, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.012937399558722973, + "rewards/margins": 0.027429040521383286, + "rewards/rejected": -0.014491640031337738, + "step": 1440 + }, + { + "epoch": 0.76, + "learning_rate": 4.6526528496429606e-07, + "logits/chosen": 0.3130524456501007, + "logits/rejected": 0.2565365433692932, + "logps/chosen": -358.9948425292969, + "logps/rejected": -256.02764892578125, + "loss": 0.6776, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.022862132638692856, + "rewards/margins": 0.057194191962480545, + "rewards/rejected": -0.03433205932378769, + "step": 1450 + }, + { + "epoch": 0.76, + "learning_rate": 4.644871637677745e-07, + "logits/chosen": 0.33667245507240295, + "logits/rejected": 0.28259915113449097, + "logps/chosen": -315.0734558105469, + "logps/rejected": -243.84463500976562, + "loss": 0.68, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 0.008300681598484516, + "rewards/margins": 0.01949361525475979, + "rewards/rejected": -0.011192934587597847, + "step": 1460 + }, + { + "epoch": 0.77, + "learning_rate": 4.637010888813638e-07, + "logits/chosen": 0.40810179710388184, + "logits/rejected": 0.24761733412742615, + "logps/chosen": -432.92425537109375, + "logps/rejected": -285.3038330078125, + "loss": 0.6774, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 0.04085806757211685, + "rewards/margins": 0.07843003422021866, + "rewards/rejected": -0.037571974098682404, + "step": 1470 + }, + { + "epoch": 0.77, + "learning_rate": 4.6290708945457493e-07, + "logits/chosen": 0.3285070061683655, + "logits/rejected": 0.3192768096923828, + "logps/chosen": -324.9331359863281, + "logps/rejected": -330.42132568359375, + "loss": 0.6777, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.00941525213420391, + "rewards/margins": 0.016397925093770027, + "rewards/rejected": -0.00698267063125968, + "step": 1480 + }, + { + "epoch": 0.78, + "learning_rate": 4.6210519493077887e-07, + "logits/chosen": 0.41080838441848755, + "logits/rejected": 0.396353155374527, + "logps/chosen": -300.5375061035156, + "logps/rejected": -276.6629943847656, + "loss": 0.6791, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.016803177073597908, + "rewards/margins": 0.0380239263176918, + "rewards/rejected": -0.021220751106739044, + "step": 1490 + }, + { + "epoch": 0.78, + "learning_rate": 4.6129543504611607e-07, + "logits/chosen": 0.31903237104415894, + "logits/rejected": 0.35140854120254517, + "logps/chosen": -290.30255126953125, + "logps/rejected": -328.60443115234375, + "loss": 0.6779, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 0.006040631793439388, + "rewards/margins": 0.028903204947710037, + "rewards/rejected": -0.022862572222948074, + "step": 1500 + }, + { + "epoch": 0.79, + "learning_rate": 4.604778398283927e-07, + "logits/chosen": 0.3273167014122009, + "logits/rejected": 0.2575587034225464, + "logps/chosen": -274.8236389160156, + "logps/rejected": -232.8859405517578, + "loss": 0.6789, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.0010220941621810198, + "rewards/margins": 0.028309160843491554, + "rewards/rejected": -0.029331251978874207, + "step": 1510 + }, + { + "epoch": 0.8, + "learning_rate": 4.596524395959678e-07, + "logits/chosen": 0.4078959822654724, + "logits/rejected": 0.3541221618652344, + "logps/chosen": -306.6305847167969, + "logps/rejected": -306.5215148925781, + "loss": 0.6773, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": 0.018020112067461014, + "rewards/margins": 0.02500280737876892, + "rewards/rejected": -0.006982696708291769, + "step": 1520 + }, + { + "epoch": 0.8, + "learning_rate": 4.588192649566285e-07, + "logits/chosen": 0.26278024911880493, + "logits/rejected": 0.31725552678108215, + "logps/chosen": -382.01214599609375, + "logps/rejected": -314.90850830078125, + "loss": 0.6788, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.0021470137871801853, + "rewards/margins": 0.039449065923690796, + "rewards/rejected": -0.04159608483314514, + "step": 1530 + }, + { + "epoch": 0.81, + "learning_rate": 4.5797834680645553e-07, + "logits/chosen": 0.2967289984226227, + "logits/rejected": 0.32962074875831604, + "logps/chosen": -374.12322998046875, + "logps/rejected": -369.6998291015625, + "loss": 0.6792, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.03092324174940586, + "rewards/margins": 0.03554985299706459, + "rewards/rejected": -0.004626607988029718, + "step": 1540 + }, + { + "epoch": 0.81, + "learning_rate": 4.5712971632867715e-07, + "logits/chosen": 0.4084998071193695, + "logits/rejected": 0.417635977268219, + "logps/chosen": -211.31640625, + "logps/rejected": -183.09400939941406, + "loss": 0.6775, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.00762952771037817, + "rewards/margins": 0.016260989010334015, + "rewards/rejected": -0.02389051578938961, + "step": 1550 + }, + { + "epoch": 0.82, + "learning_rate": 4.562734049925129e-07, + "logits/chosen": 0.3083654046058655, + "logits/rejected": 0.33990222215652466, + "logps/chosen": -249.00332641601562, + "logps/rejected": -260.27191162109375, + "loss": 0.6801, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.017335115000605583, + "rewards/margins": -0.0029615708626806736, + "rewards/rejected": -0.014373543672263622, + "step": 1560 + }, + { + "epoch": 0.82, + "learning_rate": 4.5540944455200663e-07, + "logits/chosen": 0.29243311285972595, + "logits/rejected": 0.38317304849624634, + "logps/chosen": -291.7667541503906, + "logps/rejected": -305.8726806640625, + "loss": 0.6806, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.0014521919656544924, + "rewards/margins": 0.01298120804131031, + "rewards/rejected": -0.014433401636779308, + "step": 1570 + }, + { + "epoch": 0.83, + "learning_rate": 4.545378670448492e-07, + "logits/chosen": 0.3252048194408417, + "logits/rejected": 0.3160475194454193, + "logps/chosen": -308.96282958984375, + "logps/rejected": -296.4386291503906, + "loss": 0.6814, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.011841426603496075, + "rewards/margins": 0.03404033184051514, + "rewards/rejected": -0.022198904305696487, + "step": 1580 + }, + { + "epoch": 0.83, + "learning_rate": 4.5365870479119014e-07, + "logits/chosen": 0.3502393662929535, + "logits/rejected": 0.3742118775844574, + "logps/chosen": -343.4779357910156, + "logps/rejected": -285.8212890625, + "loss": 0.6773, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.00474547827616334, + "rewards/margins": 0.039683885872364044, + "rewards/rejected": -0.034938402473926544, + "step": 1590 + }, + { + "epoch": 0.84, + "learning_rate": 4.5277199039243917e-07, + "logits/chosen": 0.40031924843788147, + "logits/rejected": 0.3826026916503906, + "logps/chosen": -381.2270812988281, + "logps/rejected": -255.69287109375, + "loss": 0.6751, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.010327344760298729, + "rewards/margins": 0.037659358233213425, + "rewards/rejected": -0.027332013472914696, + "step": 1600 + }, + { + "epoch": 0.84, + "learning_rate": 4.5187775673005744e-07, + "logits/chosen": 0.3817320764064789, + "logits/rejected": 0.32918184995651245, + "logps/chosen": -294.9057922363281, + "logps/rejected": -206.7053985595703, + "loss": 0.6745, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 0.0099642314016819, + "rewards/margins": 0.04299734905362129, + "rewards/rejected": -0.03303311765193939, + "step": 1610 + }, + { + "epoch": 0.85, + "learning_rate": 4.509760369643384e-07, + "logits/chosen": 0.3825025260448456, + "logits/rejected": 0.42483648657798767, + "logps/chosen": -308.7125549316406, + "logps/rejected": -309.3365173339844, + "loss": 0.6774, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.003090689657256007, + "rewards/margins": 0.006784576922655106, + "rewards/rejected": -0.009875266812741756, + "step": 1620 + }, + { + "epoch": 0.85, + "learning_rate": 4.5006686453317734e-07, + "logits/chosen": 0.3839607536792755, + "logits/rejected": 0.3535715937614441, + "logps/chosen": -339.5386047363281, + "logps/rejected": -244.91543579101562, + "loss": 0.6797, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 0.0017743610078468919, + "rewards/margins": 0.03925933688879013, + "rewards/rejected": -0.037484973669052124, + "step": 1630 + }, + { + "epoch": 0.86, + "learning_rate": 4.4915027315083243e-07, + "logits/chosen": 0.2991335988044739, + "logits/rejected": 0.2891019582748413, + "logps/chosen": -304.2152099609375, + "logps/rejected": -264.15185546875, + "loss": 0.6802, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 0.014160381630063057, + "rewards/margins": 0.05216909572482109, + "rewards/rejected": -0.038008708506822586, + "step": 1640 + }, + { + "epoch": 0.86, + "learning_rate": 4.482262968066737e-07, + "logits/chosen": 0.32558315992355347, + "logits/rejected": 0.33039188385009766, + "logps/chosen": -271.199462890625, + "logps/rejected": -319.0653381347656, + "loss": 0.679, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.000756078225094825, + "rewards/margins": 0.031729746609926224, + "rewards/rejected": -0.03097366914153099, + "step": 1650 + }, + { + "epoch": 0.87, + "learning_rate": 4.4729496976392324e-07, + "logits/chosen": 0.34333691000938416, + "logits/rejected": 0.4006190299987793, + "logps/chosen": -281.2134094238281, + "logps/rejected": -294.54595947265625, + "loss": 0.6751, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.00261726719327271, + "rewards/margins": 0.031192597001791, + "rewards/rejected": -0.02857532724738121, + "step": 1660 + }, + { + "epoch": 0.87, + "learning_rate": 4.463563265583843e-07, + "logits/chosen": 0.29203808307647705, + "logits/rejected": 0.31420475244522095, + "logps/chosen": -298.7978515625, + "logps/rejected": -272.68255615234375, + "loss": 0.6754, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.0030163261108100414, + "rewards/margins": 0.013660850934684277, + "rewards/rejected": -0.010644523426890373, + "step": 1670 + }, + { + "epoch": 0.88, + "learning_rate": 4.4541040199716063e-07, + "logits/chosen": 0.30822497606277466, + "logits/rejected": 0.2724303603172302, + "logps/chosen": -269.5755920410156, + "logps/rejected": -256.20172119140625, + "loss": 0.6808, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.009109275415539742, + "rewards/margins": 0.013653385452926159, + "rewards/rejected": -0.022762665525078773, + "step": 1680 + }, + { + "epoch": 0.88, + "learning_rate": 4.4445723115736587e-07, + "logits/chosen": 0.33946898579597473, + "logits/rejected": 0.2928759455680847, + "logps/chosen": -351.9305419921875, + "logps/rejected": -251.12399291992188, + "loss": 0.6756, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.007805541157722473, + "rewards/margins": 0.055908508598804474, + "rewards/rejected": -0.0481029637157917, + "step": 1690 + }, + { + "epoch": 0.89, + "learning_rate": 4.434968493848228e-07, + "logits/chosen": 0.4504426419734955, + "logits/rejected": 0.37384623289108276, + "logps/chosen": -277.2381286621094, + "logps/rejected": -238.30520629882812, + "loss": 0.6752, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.002321247011423111, + "rewards/margins": 0.023793473839759827, + "rewards/rejected": -0.021472224965691566, + "step": 1700 + }, + { + "epoch": 0.89, + "learning_rate": 4.425292922927525e-07, + "logits/chosen": 0.383585125207901, + "logits/rejected": 0.28925397992134094, + "logps/chosen": -257.552734375, + "logps/rejected": -234.51431274414062, + "loss": 0.6798, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 0.002572057768702507, + "rewards/margins": 0.04384605213999748, + "rewards/rejected": -0.041273992508649826, + "step": 1710 + }, + { + "epoch": 0.9, + "learning_rate": 4.41554595760454e-07, + "logits/chosen": 0.284583181142807, + "logits/rejected": 0.25669267773628235, + "logps/chosen": -370.1746826171875, + "logps/rejected": -323.588623046875, + "loss": 0.6772, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 0.015700947493314743, + "rewards/margins": 0.042133111506700516, + "rewards/rejected": -0.026432165876030922, + "step": 1720 + }, + { + "epoch": 0.91, + "learning_rate": 4.4057279593197326e-07, + "logits/chosen": 0.3870350122451782, + "logits/rejected": 0.39999377727508545, + "logps/chosen": -249.8700714111328, + "logps/rejected": -261.95843505859375, + "loss": 0.6768, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.001268685213290155, + "rewards/margins": 0.021950436756014824, + "rewards/rejected": -0.023219123482704163, + "step": 1730 + }, + { + "epoch": 0.91, + "learning_rate": 4.395839292147637e-07, + "logits/chosen": 0.3090742230415344, + "logits/rejected": 0.30543801188468933, + "logps/chosen": -321.3290100097656, + "logps/rejected": -286.99774169921875, + "loss": 0.676, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 0.024974200874567032, + "rewards/margins": 0.06467586010694504, + "rewards/rejected": -0.039701662957668304, + "step": 1740 + }, + { + "epoch": 0.92, + "learning_rate": 4.3858803227833526e-07, + "logits/chosen": 0.24626950919628143, + "logits/rejected": 0.24712149798870087, + "logps/chosen": -372.52020263671875, + "logps/rejected": -345.7261657714844, + "loss": 0.6744, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.021267516538500786, + "rewards/margins": 0.05029396340250969, + "rewards/rejected": -0.029026448726654053, + "step": 1750 + }, + { + "epoch": 0.92, + "learning_rate": 4.375851420528951e-07, + "logits/chosen": 0.32322531938552856, + "logits/rejected": 0.34263914823532104, + "logps/chosen": -288.7018737792969, + "logps/rejected": -257.0304260253906, + "loss": 0.6752, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.0024817243684083223, + "rewards/margins": 0.02616637572646141, + "rewards/rejected": -0.028648102656006813, + "step": 1760 + }, + { + "epoch": 0.93, + "learning_rate": 4.36575295727978e-07, + "logits/chosen": 0.38421711325645447, + "logits/rejected": 0.41225776076316833, + "logps/chosen": -309.842529296875, + "logps/rejected": -314.6719665527344, + "loss": 0.6796, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.009599078446626663, + "rewards/margins": 0.052566200494766235, + "rewards/rejected": -0.042967118322849274, + "step": 1770 + }, + { + "epoch": 0.93, + "learning_rate": 4.355585307510675e-07, + "logits/chosen": 0.391152948141098, + "logits/rejected": 0.35352757573127747, + "logps/chosen": -262.3291320800781, + "logps/rejected": -261.0271301269531, + "loss": 0.6768, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.017994780093431473, + "rewards/margins": 0.007160873152315617, + "rewards/rejected": -0.025155652314424515, + "step": 1780 + }, + { + "epoch": 0.94, + "learning_rate": 4.345348848262068e-07, + "logits/chosen": 0.36389559507369995, + "logits/rejected": 0.39531487226486206, + "logps/chosen": -333.47760009765625, + "logps/rejected": -261.5639953613281, + "loss": 0.6794, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.007003250531852245, + "rewards/margins": 0.015884755179286003, + "rewards/rejected": -0.022888006642460823, + "step": 1790 + }, + { + "epoch": 0.94, + "learning_rate": 4.33504395912601e-07, + "logits/chosen": 0.28872233629226685, + "logits/rejected": 0.25540515780448914, + "logps/chosen": -337.00360107421875, + "logps/rejected": -328.2117004394531, + "loss": 0.6764, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": 0.0031368457712233067, + "rewards/margins": 0.034987322986125946, + "rewards/rejected": -0.03185047581791878, + "step": 1800 + }, + { + "epoch": 0.95, + "learning_rate": 4.324671022232095e-07, + "logits/chosen": 0.2700476050376892, + "logits/rejected": 0.34975817799568176, + "logps/chosen": -226.2433319091797, + "logps/rejected": -245.605712890625, + "loss": 0.6731, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.019788941368460655, + "rewards/margins": 0.02122773975133896, + "rewards/rejected": -0.04101668298244476, + "step": 1810 + }, + { + "epoch": 0.95, + "learning_rate": 4.314230422233286e-07, + "logits/chosen": 0.367465615272522, + "logits/rejected": 0.34759631752967834, + "logps/chosen": -327.2635498046875, + "logps/rejected": -291.80902099609375, + "loss": 0.671, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 0.021458884701132774, + "rewards/margins": 0.05451428145170212, + "rewards/rejected": -0.033055394887924194, + "step": 1820 + }, + { + "epoch": 0.96, + "learning_rate": 4.303722546291655e-07, + "logits/chosen": 0.2959325611591339, + "logits/rejected": 0.25788697600364685, + "logps/chosen": -288.8806457519531, + "logps/rejected": -267.59320068359375, + "loss": 0.6719, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.005892341025173664, + "rewards/margins": 0.03235545754432678, + "rewards/rejected": -0.03824779763817787, + "step": 1830 + }, + { + "epoch": 0.96, + "learning_rate": 4.2931477840640243e-07, + "logits/chosen": 0.30159324407577515, + "logits/rejected": 0.324674516916275, + "logps/chosen": -312.8402404785156, + "logps/rejected": -270.1824645996094, + "loss": 0.6732, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.010435075499117374, + "rewards/margins": 0.05785765126347542, + "rewards/rejected": -0.04742256924510002, + "step": 1840 + }, + { + "epoch": 0.97, + "learning_rate": 4.282506527687517e-07, + "logits/chosen": 0.5027902126312256, + "logits/rejected": 0.5105705261230469, + "logps/chosen": -254.4239959716797, + "logps/rejected": -224.8533477783203, + "loss": 0.6752, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 0.01124303974211216, + "rewards/margins": 0.059338875114917755, + "rewards/rejected": -0.048095833510160446, + "step": 1850 + }, + { + "epoch": 0.97, + "learning_rate": 4.271799171765016e-07, + "logits/chosen": 0.37145090103149414, + "logits/rejected": 0.4021398425102234, + "logps/chosen": -241.52194213867188, + "logps/rejected": -235.2782440185547, + "loss": 0.6694, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.0031918413005769253, + "rewards/margins": 0.042500268667936325, + "rewards/rejected": -0.03930842876434326, + "step": 1860 + }, + { + "epoch": 0.98, + "learning_rate": 4.2610261133505323e-07, + "logits/chosen": 0.36971864104270935, + "logits/rejected": 0.303048312664032, + "logps/chosen": -361.45941162109375, + "logps/rejected": -338.87738037109375, + "loss": 0.6722, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 0.017358357086777687, + "rewards/margins": 0.05369344353675842, + "rewards/rejected": -0.036335088312625885, + "step": 1870 + }, + { + "epoch": 0.98, + "learning_rate": 4.250187751934479e-07, + "logits/chosen": 0.22648346424102783, + "logits/rejected": 0.2450123280286789, + "logps/chosen": -312.0504150390625, + "logps/rejected": -309.24127197265625, + "loss": 0.6723, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.006947515066713095, + "rewards/margins": 0.025908803567290306, + "rewards/rejected": -0.03285632282495499, + "step": 1880 + }, + { + "epoch": 0.99, + "learning_rate": 4.2392844894288605e-07, + "logits/chosen": 0.36955493688583374, + "logits/rejected": 0.37511900067329407, + "logps/chosen": -281.3575134277344, + "logps/rejected": -293.9774169921875, + "loss": 0.6748, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.008413514122366905, + "rewards/margins": 0.05240880325436592, + "rewards/rejected": -0.060822319239377975, + "step": 1890 + }, + { + "epoch": 0.99, + "learning_rate": 4.2283167301523634e-07, + "logits/chosen": 0.380458801984787, + "logits/rejected": 0.36902934312820435, + "logps/chosen": -254.7010498046875, + "logps/rejected": -220.5850372314453, + "loss": 0.677, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.01337057538330555, + "rewards/margins": 0.040344201028347015, + "rewards/rejected": -0.05371478199958801, + "step": 1900 + }, + { + "epoch": 1.0, + "learning_rate": 4.217284880815369e-07, + "logits/chosen": 0.2915937304496765, + "logits/rejected": 0.28769439458847046, + "logps/chosen": -311.2845458984375, + "logps/rejected": -284.9055480957031, + "loss": 0.6727, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.008056707680225372, + "rewards/margins": 0.026231324300169945, + "rewards/rejected": -0.01817461848258972, + "step": 1910 + }, + { + "epoch": 1.0, + "learning_rate": 4.2061893505048694e-07, + "logits/chosen": 0.4433872103691101, + "logits/rejected": 0.5107973217964172, + "logps/chosen": -273.775390625, + "logps/rejected": -284.90509033203125, + "loss": 0.6723, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.0016942888032644987, + "rewards/margins": 0.04829670116305351, + "rewards/rejected": -0.04999098926782608, + "step": 1920 + }, + { + "epoch": 1.01, + "learning_rate": 4.1950305506692967e-07, + "logits/chosen": 0.38846421241760254, + "logits/rejected": 0.38444000482559204, + "logps/chosen": -299.87896728515625, + "logps/rejected": -298.85369873046875, + "loss": 0.6756, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.0038868593983352184, + "rewards/margins": 0.0499236099421978, + "rewards/rejected": -0.046036750078201294, + "step": 1930 + }, + { + "epoch": 1.02, + "learning_rate": 4.1838088951032656e-07, + "logits/chosen": 0.25646716356277466, + "logits/rejected": 0.2298089563846588, + "logps/chosen": -345.03997802734375, + "logps/rejected": -319.909912109375, + "loss": 0.6709, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.00014434941112995148, + "rewards/margins": 0.06486411392688751, + "rewards/rejected": -0.06500846892595291, + "step": 1940 + }, + { + "epoch": 1.02, + "learning_rate": 4.172524799932231e-07, + "logits/chosen": 0.4529836177825928, + "logits/rejected": 0.4203832745552063, + "logps/chosen": -221.33285522460938, + "logps/rejected": -218.61892700195312, + "loss": 0.673, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.016625165939331055, + "rewards/margins": 0.043132197111845016, + "rewards/rejected": -0.05975737050175667, + "step": 1950 + }, + { + "epoch": 1.03, + "learning_rate": 4.161178683597054e-07, + "logits/chosen": 0.3584556579589844, + "logits/rejected": 0.36893972754478455, + "logps/chosen": -299.40118408203125, + "logps/rejected": -290.5531921386719, + "loss": 0.6748, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.01853383705019951, + "rewards/margins": 0.02997218444943428, + "rewards/rejected": -0.04850602149963379, + "step": 1960 + }, + { + "epoch": 1.03, + "learning_rate": 4.1497709668384885e-07, + "logits/chosen": 0.26903319358825684, + "logits/rejected": 0.2895038425922394, + "logps/chosen": -355.22833251953125, + "logps/rejected": -334.9042663574219, + "loss": 0.6753, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.0025704544968903065, + "rewards/margins": 0.03809639438986778, + "rewards/rejected": -0.03552594035863876, + "step": 1970 + }, + { + "epoch": 1.04, + "learning_rate": 4.1383020726815745e-07, + "logits/chosen": 0.30599287152290344, + "logits/rejected": 0.19721153378486633, + "logps/chosen": -380.2153015136719, + "logps/rejected": -280.277587890625, + "loss": 0.6702, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 0.004432808607816696, + "rewards/margins": 0.04755949229001999, + "rewards/rejected": -0.043126679956912994, + "step": 1980 + }, + { + "epoch": 1.04, + "learning_rate": 4.126772426419959e-07, + "logits/chosen": 0.40132278203964233, + "logits/rejected": 0.3763146698474884, + "logps/chosen": -268.63140869140625, + "logps/rejected": -236.5342254638672, + "loss": 0.6784, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.01728612184524536, + "rewards/margins": 0.02777310088276863, + "rewards/rejected": -0.045059219002723694, + "step": 1990 + }, + { + "epoch": 1.05, + "learning_rate": 4.1151824556001145e-07, + "logits/chosen": 0.3457508683204651, + "logits/rejected": 0.30099207162857056, + "logps/chosen": -280.82720947265625, + "logps/rejected": -264.7738342285156, + "loss": 0.6737, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.02697814628481865, + "rewards/margins": 0.02771422266960144, + "rewards/rejected": -0.05469236522912979, + "step": 2000 + }, + { + "epoch": 1.05, + "learning_rate": 4.103532590005495e-07, + "logits/chosen": 0.40293654799461365, + "logits/rejected": 0.33411556482315063, + "logps/chosen": -299.47198486328125, + "logps/rejected": -236.7769012451172, + "loss": 0.6698, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 0.02038879506289959, + "rewards/margins": 0.06998451054096222, + "rewards/rejected": -0.049595706164836884, + "step": 2010 + }, + { + "epoch": 1.06, + "learning_rate": 4.091823261640592e-07, + "logits/chosen": 0.39449697732925415, + "logits/rejected": 0.2622838616371155, + "logps/chosen": -361.0088806152344, + "logps/rejected": -254.7809295654297, + "loss": 0.671, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.0016580044757574797, + "rewards/margins": 0.05266914889216423, + "rewards/rejected": -0.05432716757059097, + "step": 2020 + }, + { + "epoch": 1.06, + "learning_rate": 4.080054904714917e-07, + "logits/chosen": 0.45272356271743774, + "logits/rejected": 0.3646053969860077, + "logps/chosen": -290.106689453125, + "logps/rejected": -281.1361083984375, + "loss": 0.6713, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.013397174887359142, + "rewards/margins": 0.034534044563770294, + "rewards/rejected": -0.04793121665716171, + "step": 2030 + }, + { + "epoch": 1.07, + "learning_rate": 4.0682279556268993e-07, + "logits/chosen": 0.3366960883140564, + "logits/rejected": 0.29935771226882935, + "logps/chosen": -343.77142333984375, + "logps/rejected": -292.15960693359375, + "loss": 0.6702, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.0014214410912245512, + "rewards/margins": 0.039344437420368195, + "rewards/rejected": -0.037922997027635574, + "step": 2040 + }, + { + "epoch": 1.07, + "learning_rate": 4.056342852947706e-07, + "logits/chosen": 0.4113841950893402, + "logits/rejected": 0.36100929975509644, + "logps/chosen": -304.214599609375, + "logps/rejected": -267.27294921875, + "loss": 0.6753, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.0162814911454916, + "rewards/margins": 0.03878726810216904, + "rewards/rejected": -0.05506875365972519, + "step": 2050 + }, + { + "epoch": 1.08, + "learning_rate": 4.044400037404973e-07, + "logits/chosen": 0.2680433392524719, + "logits/rejected": 0.23881573975086212, + "logps/chosen": -247.9770050048828, + "logps/rejected": -265.25341796875, + "loss": 0.6702, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.01128520630300045, + "rewards/margins": 0.043352216482162476, + "rewards/rejected": -0.054637424647808075, + "step": 2060 + }, + { + "epoch": 1.08, + "learning_rate": 4.032399951866468e-07, + "logits/chosen": 0.3386828303337097, + "logits/rejected": 0.37686488032341003, + "logps/chosen": -260.5417175292969, + "logps/rejected": -324.646484375, + "loss": 0.6783, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.025614675134420395, + "rewards/margins": 0.016095593571662903, + "rewards/rejected": -0.041710264980793, + "step": 2070 + }, + { + "epoch": 1.09, + "learning_rate": 4.0203430413236637e-07, + "logits/chosen": 0.3427388072013855, + "logits/rejected": 0.2969673275947571, + "logps/chosen": -331.38677978515625, + "logps/rejected": -326.67315673828125, + "loss": 0.6724, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.006624752189964056, + "rewards/margins": 0.06880460679531097, + "rewards/rejected": -0.07542935013771057, + "step": 2080 + }, + { + "epoch": 1.09, + "learning_rate": 4.0082297528752407e-07, + "logits/chosen": 0.3527575135231018, + "logits/rejected": 0.3978745639324188, + "logps/chosen": -256.7491760253906, + "logps/rejected": -277.7139587402344, + "loss": 0.6678, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": -0.04201974719762802, + "rewards/margins": 0.015251986682415009, + "rewards/rejected": -0.05727173015475273, + "step": 2090 + }, + { + "epoch": 1.1, + "learning_rate": 3.9960605357105e-07, + "logits/chosen": 0.29884204268455505, + "logits/rejected": 0.21345266699790955, + "logps/chosen": -328.6796875, + "logps/rejected": -261.64208984375, + "loss": 0.6663, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.0030146692879498005, + "rewards/margins": 0.06451131403446198, + "rewards/rejected": -0.06149665638804436, + "step": 2100 + }, + { + "epoch": 1.1, + "learning_rate": 3.983835841092716e-07, + "logits/chosen": 0.23722746968269348, + "logits/rejected": 0.23029477894306183, + "logps/chosen": -390.21002197265625, + "logps/rejected": -309.8589782714844, + "loss": 0.6654, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 0.006493761204183102, + "rewards/margins": 0.09093605726957321, + "rewards/rejected": -0.08444229513406754, + "step": 2110 + }, + { + "epoch": 1.11, + "learning_rate": 3.971556122342398e-07, + "logits/chosen": 0.4192166328430176, + "logits/rejected": 0.4076710343360901, + "logps/chosen": -287.0326232910156, + "logps/rejected": -246.2929229736328, + "loss": 0.67, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.022333037108182907, + "rewards/margins": 0.04057054594159126, + "rewards/rejected": -0.06290359050035477, + "step": 2120 + }, + { + "epoch": 1.11, + "learning_rate": 3.9592218348204766e-07, + "logits/chosen": 0.3088974356651306, + "logits/rejected": 0.2831978499889374, + "logps/chosen": -273.89495849609375, + "logps/rejected": -264.4192810058594, + "loss": 0.6762, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.020427577197551727, + "rewards/margins": 0.03233342617750168, + "rewards/rejected": -0.05276099964976311, + "step": 2130 + }, + { + "epoch": 1.12, + "learning_rate": 3.946833435911423e-07, + "logits/chosen": 0.3843507766723633, + "logits/rejected": 0.39491331577301025, + "logps/chosen": -282.49163818359375, + "logps/rejected": -212.6112060546875, + "loss": 0.6677, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.013249019160866737, + "rewards/margins": 0.09190802276134491, + "rewards/rejected": -0.07865899056196213, + "step": 2140 + }, + { + "epoch": 1.13, + "learning_rate": 3.9343913850062856e-07, + "logits/chosen": 0.2880811095237732, + "logits/rejected": 0.2723557949066162, + "logps/chosen": -298.74163818359375, + "logps/rejected": -255.66567993164062, + "loss": 0.6739, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.027912423014640808, + "rewards/margins": 0.040033094584941864, + "rewards/rejected": -0.06794553250074387, + "step": 2150 + }, + { + "epoch": 1.13, + "learning_rate": 3.921896143485657e-07, + "logits/chosen": 0.2881740927696228, + "logits/rejected": 0.3070821762084961, + "logps/chosen": -277.80462646484375, + "logps/rejected": -267.8509521484375, + "loss": 0.6694, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.03029860183596611, + "rewards/margins": 0.03809525817632675, + "rewards/rejected": -0.06839386373758316, + "step": 2160 + }, + { + "epoch": 1.14, + "learning_rate": 3.9093481747025615e-07, + "logits/chosen": 0.36197030544281006, + "logits/rejected": 0.33939141035079956, + "logps/chosen": -365.284423828125, + "logps/rejected": -327.8709716796875, + "loss": 0.6669, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.02871832624077797, + "rewards/margins": 0.041896142065525055, + "rewards/rejected": -0.07061446458101273, + "step": 2170 + }, + { + "epoch": 1.14, + "learning_rate": 3.896747943965275e-07, + "logits/chosen": 0.28229203820228577, + "logits/rejected": 0.3258668780326843, + "logps/chosen": -254.9258270263672, + "logps/rejected": -258.52142333984375, + "loss": 0.6717, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.023102175444364548, + "rewards/margins": 0.06240806728601456, + "rewards/rejected": -0.0855102464556694, + "step": 2180 + }, + { + "epoch": 1.15, + "learning_rate": 3.8840959185200717e-07, + "logits/chosen": 0.2814302146434784, + "logits/rejected": 0.30456703901290894, + "logps/chosen": -275.612060546875, + "logps/rejected": -282.63909912109375, + "loss": 0.6693, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.01812458410859108, + "rewards/margins": 0.020614150911569595, + "rewards/rejected": -0.038738735020160675, + "step": 2190 + }, + { + "epoch": 1.15, + "learning_rate": 3.871392567533893e-07, + "logits/chosen": 0.37268248200416565, + "logits/rejected": 0.3728785514831543, + "logps/chosen": -278.0909423828125, + "logps/rejected": -248.9449920654297, + "loss": 0.6741, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.03167051449418068, + "rewards/margins": 0.046841494739055634, + "rewards/rejected": -0.07851200550794601, + "step": 2200 + }, + { + "epoch": 1.16, + "learning_rate": 3.858638362076953e-07, + "logits/chosen": 0.3451462984085083, + "logits/rejected": 0.3002368211746216, + "logps/chosen": -288.4288330078125, + "logps/rejected": -265.7797546386719, + "loss": 0.6735, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.03316992148756981, + "rewards/margins": 0.03865870088338852, + "rewards/rejected": -0.07182861864566803, + "step": 2210 + }, + { + "epoch": 1.16, + "learning_rate": 3.845833775105272e-07, + "logits/chosen": 0.36414092779159546, + "logits/rejected": 0.356741726398468, + "logps/chosen": -326.46490478515625, + "logps/rejected": -294.9678039550781, + "loss": 0.6683, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.020617762580513954, + "rewards/margins": 0.0373426154255867, + "rewards/rejected": -0.05796036869287491, + "step": 2220 + }, + { + "epoch": 1.17, + "learning_rate": 3.832979281443133e-07, + "logits/chosen": 0.31907418370246887, + "logits/rejected": 0.29716235399246216, + "logps/chosen": -337.14874267578125, + "logps/rejected": -303.4570007324219, + "loss": 0.6727, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.014699602499604225, + "rewards/margins": 0.035601574927568436, + "rewards/rejected": -0.05030117556452751, + "step": 2230 + }, + { + "epoch": 1.17, + "learning_rate": 3.8200753577654765e-07, + "logits/chosen": 0.33600661158561707, + "logits/rejected": 0.3653258681297302, + "logps/chosen": -278.1409912109375, + "logps/rejected": -219.810791015625, + "loss": 0.6669, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.049881428480148315, + "rewards/margins": 0.025163423269987106, + "rewards/rejected": -0.07504484802484512, + "step": 2240 + }, + { + "epoch": 1.18, + "learning_rate": 3.8071224825802273e-07, + "logits/chosen": 0.30984312295913696, + "logits/rejected": 0.32006576657295227, + "logps/chosen": -266.3006591796875, + "logps/rejected": -258.7674560546875, + "loss": 0.6643, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.022308386862277985, + "rewards/margins": 0.03970428183674812, + "rewards/rejected": -0.062012672424316406, + "step": 2250 + }, + { + "epoch": 1.18, + "learning_rate": 3.7941211362105453e-07, + "logits/chosen": 0.3602008819580078, + "logits/rejected": 0.36997145414352417, + "logps/chosen": -377.5152893066406, + "logps/rejected": -342.37744140625, + "loss": 0.6676, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.013790899887681007, + "rewards/margins": 0.059584714472293854, + "rewards/rejected": -0.0457938127219677, + "step": 2260 + }, + { + "epoch": 1.19, + "learning_rate": 3.781071800777017e-07, + "logits/chosen": 0.42406994104385376, + "logits/rejected": 0.406341552734375, + "logps/chosen": -289.2626037597656, + "logps/rejected": -255.7004852294922, + "loss": 0.6661, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.04291009157896042, + "rewards/margins": 0.031034070998430252, + "rewards/rejected": -0.07394416630268097, + "step": 2270 + }, + { + "epoch": 1.19, + "learning_rate": 3.767974960179776e-07, + "logits/chosen": 0.38467639684677124, + "logits/rejected": 0.36330491304397583, + "logps/chosen": -376.39801025390625, + "logps/rejected": -299.58599853515625, + "loss": 0.6696, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 0.02126261219382286, + "rewards/margins": 0.0888260155916214, + "rewards/rejected": -0.06756339967250824, + "step": 2280 + }, + { + "epoch": 1.2, + "learning_rate": 3.7548311000805605e-07, + "logits/chosen": 0.383809894323349, + "logits/rejected": 0.3646061420440674, + "logps/chosen": -299.8836975097656, + "logps/rejected": -254.42153930664062, + "loss": 0.6699, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.025484036654233932, + "rewards/margins": 0.05899130553007126, + "rewards/rejected": -0.08447533845901489, + "step": 2290 + }, + { + "epoch": 1.2, + "learning_rate": 3.7416407078847015e-07, + "logits/chosen": 0.39214324951171875, + "logits/rejected": 0.39261943101882935, + "logps/chosen": -319.5709533691406, + "logps/rejected": -287.7979736328125, + "loss": 0.6672, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.016738515347242355, + "rewards/margins": 0.06187937781214714, + "rewards/rejected": -0.0786178931593895, + "step": 2300 + }, + { + "epoch": 1.21, + "learning_rate": 3.7284042727230506e-07, + "logits/chosen": 0.3887889087200165, + "logits/rejected": 0.3705871105194092, + "logps/chosen": -288.55438232421875, + "logps/rejected": -297.3534240722656, + "loss": 0.6688, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.011513590812683105, + "rewards/margins": 0.06126584857702255, + "rewards/rejected": -0.07277944684028625, + "step": 2310 + }, + { + "epoch": 1.21, + "learning_rate": 3.7151222854338413e-07, + "logits/chosen": 0.36249834299087524, + "logits/rejected": 0.44951605796813965, + "logps/chosen": -264.47503662109375, + "logps/rejected": -288.58367919921875, + "loss": 0.6675, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.019437994807958603, + "rewards/margins": 0.05599921941757202, + "rewards/rejected": -0.07543721050024033, + "step": 2320 + }, + { + "epoch": 1.22, + "learning_rate": 3.701795238544488e-07, + "logits/chosen": 0.3534383773803711, + "logits/rejected": 0.35369449853897095, + "logps/chosen": -345.45233154296875, + "logps/rejected": -266.8211364746094, + "loss": 0.6697, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.011401348747313023, + "rewards/margins": 0.04326106235384941, + "rewards/rejected": -0.05466241389513016, + "step": 2330 + }, + { + "epoch": 1.22, + "learning_rate": 3.688423626253318e-07, + "logits/chosen": 0.4797401428222656, + "logits/rejected": 0.3779059648513794, + "logps/chosen": -360.5686950683594, + "logps/rejected": -247.0854034423828, + "loss": 0.6667, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.023860588669776917, + "rewards/margins": 0.0794132724404335, + "rewards/rejected": -0.10327385365962982, + "step": 2340 + }, + { + "epoch": 1.23, + "learning_rate": 3.675007944411253e-07, + "logits/chosen": 0.33330869674682617, + "logits/rejected": 0.19414468109607697, + "logps/chosen": -334.6083068847656, + "logps/rejected": -248.15170288085938, + "loss": 0.6592, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.01777069643139839, + "rewards/margins": 0.08200386166572571, + "rewards/rejected": -0.0997745618224144, + "step": 2350 + }, + { + "epoch": 1.23, + "learning_rate": 3.6615486905034167e-07, + "logits/chosen": 0.29605159163475037, + "logits/rejected": 0.2758367359638214, + "logps/chosen": -312.92071533203125, + "logps/rejected": -266.828369140625, + "loss": 0.6672, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.034411195665597916, + "rewards/margins": 0.03460158035159111, + "rewards/rejected": -0.06901277601718903, + "step": 2360 + }, + { + "epoch": 1.24, + "learning_rate": 3.6480463636306846e-07, + "logits/chosen": 0.31986507773399353, + "logits/rejected": 0.27656489610671997, + "logps/chosen": -377.6355895996094, + "logps/rejected": -301.74896240234375, + "loss": 0.6634, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.01851150207221508, + "rewards/margins": 0.0832141637802124, + "rewards/rejected": -0.10172567516565323, + "step": 2370 + }, + { + "epoch": 1.25, + "learning_rate": 3.634501464491183e-07, + "logits/chosen": 0.37213101983070374, + "logits/rejected": 0.33602914214134216, + "logps/chosen": -316.6075439453125, + "logps/rejected": -262.6651611328125, + "loss": 0.6642, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.01945626363158226, + "rewards/margins": 0.058711398392915726, + "rewards/rejected": -0.07816765457391739, + "step": 2380 + }, + { + "epoch": 1.25, + "learning_rate": 3.6209144953617175e-07, + "logits/chosen": 0.3521496653556824, + "logits/rejected": 0.3292858898639679, + "logps/chosen": -352.98388671875, + "logps/rejected": -309.53057861328125, + "loss": 0.6659, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.02008138597011566, + "rewards/margins": 0.06914113461971283, + "rewards/rejected": -0.04905973747372627, + "step": 2390 + }, + { + "epoch": 1.26, + "learning_rate": 3.607285960079146e-07, + "logits/chosen": 0.3754183351993561, + "logits/rejected": 0.30044883489608765, + "logps/chosen": -285.4665832519531, + "logps/rejected": -233.817138671875, + "loss": 0.6676, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.03596179559826851, + "rewards/margins": 0.05098012834787369, + "rewards/rejected": -0.08694193512201309, + "step": 2400 + }, + { + "epoch": 1.26, + "learning_rate": 3.593616364021701e-07, + "logits/chosen": 0.33115464448928833, + "logits/rejected": 0.3573494255542755, + "logps/chosen": -217.48855590820312, + "logps/rejected": -243.66287231445312, + "loss": 0.6665, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.042571354657411575, + "rewards/margins": 0.040859851986169815, + "rewards/rejected": -0.0834311991930008, + "step": 2410 + }, + { + "epoch": 1.27, + "learning_rate": 3.5799062140902413e-07, + "logits/chosen": 0.29329270124435425, + "logits/rejected": 0.3528839945793152, + "logps/chosen": -254.28964233398438, + "logps/rejected": -271.6838684082031, + "loss": 0.6671, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.011812356300652027, + "rewards/margins": 0.07317445427179337, + "rewards/rejected": -0.08498681336641312, + "step": 2420 + }, + { + "epoch": 1.27, + "learning_rate": 3.566156018689462e-07, + "logits/chosen": 0.3424796760082245, + "logits/rejected": 0.3287104666233063, + "logps/chosen": -383.9771423339844, + "logps/rejected": -304.5723876953125, + "loss": 0.6749, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.019487058743834496, + "rewards/margins": 0.059456080198287964, + "rewards/rejected": -0.07894313335418701, + "step": 2430 + }, + { + "epoch": 1.28, + "learning_rate": 3.552366287709038e-07, + "logits/chosen": 0.35111716389656067, + "logits/rejected": 0.34353193640708923, + "logps/chosen": -374.93115234375, + "logps/rejected": -317.59686279296875, + "loss": 0.6677, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.004710751585662365, + "rewards/margins": 0.08451598882675171, + "rewards/rejected": -0.08922673761844635, + "step": 2440 + }, + { + "epoch": 1.28, + "learning_rate": 3.5385375325047163e-07, + "logits/chosen": 0.38372933864593506, + "logits/rejected": 0.38185805082321167, + "logps/chosen": -289.7801818847656, + "logps/rejected": -228.0769805908203, + "loss": 0.6671, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.06441032141447067, + "rewards/margins": 0.028500813990831375, + "rewards/rejected": -0.09291113913059235, + "step": 2450 + }, + { + "epoch": 1.29, + "learning_rate": 3.524670265879353e-07, + "logits/chosen": 0.2816036343574524, + "logits/rejected": 0.28083696961402893, + "logps/chosen": -371.5879821777344, + "logps/rejected": -354.3928527832031, + "loss": 0.6673, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.014490177854895592, + "rewards/margins": 0.08113773912191391, + "rewards/rejected": -0.09562792629003525, + "step": 2460 + }, + { + "epoch": 1.29, + "learning_rate": 3.510765002063901e-07, + "logits/chosen": 0.3037402629852295, + "logits/rejected": 0.32839471101760864, + "logps/chosen": -307.32916259765625, + "logps/rejected": -284.83636474609375, + "loss": 0.6696, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.0401952862739563, + "rewards/margins": 0.05480067804455757, + "rewards/rejected": -0.09499596059322357, + "step": 2470 + }, + { + "epoch": 1.3, + "learning_rate": 3.4968222566983367e-07, + "logits/chosen": 0.28994929790496826, + "logits/rejected": 0.3284696936607361, + "logps/chosen": -280.9746398925781, + "logps/rejected": -247.8575439453125, + "loss": 0.668, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.08172313123941422, + "rewards/margins": 0.007884040474891663, + "rewards/rejected": -0.08960716426372528, + "step": 2480 + }, + { + "epoch": 1.3, + "learning_rate": 3.482842546812543e-07, + "logits/chosen": 0.35081690549850464, + "logits/rejected": 0.324882447719574, + "logps/chosen": -347.78057861328125, + "logps/rejected": -286.3685607910156, + "loss": 0.6664, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.04719354957342148, + "rewards/margins": 0.06654486060142517, + "rewards/rejected": -0.11373841762542725, + "step": 2490 + }, + { + "epoch": 1.31, + "learning_rate": 3.4688263908071307e-07, + "logits/chosen": 0.2330433577299118, + "logits/rejected": 0.26575979590415955, + "logps/chosen": -333.82049560546875, + "logps/rejected": -299.0555725097656, + "loss": 0.666, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.03410058468580246, + "rewards/margins": 0.054598551243543625, + "rewards/rejected": -0.08869913965463638, + "step": 2500 + }, + { + "epoch": 1.31, + "learning_rate": 3.454774308434222e-07, + "logits/chosen": 0.39580804109573364, + "logits/rejected": 0.4053748548030853, + "logps/chosen": -269.8752746582031, + "logps/rejected": -264.6448059082031, + "loss": 0.6671, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.029624611139297485, + "rewards/margins": 0.06283847242593765, + "rewards/rejected": -0.09246308356523514, + "step": 2510 + }, + { + "epoch": 1.32, + "learning_rate": 3.4406868207781725e-07, + "logits/chosen": 0.30161893367767334, + "logits/rejected": 0.325286328792572, + "logps/chosen": -264.8555908203125, + "logps/rejected": -299.9549255371094, + "loss": 0.6686, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.06190790608525276, + "rewards/margins": 0.022210311144590378, + "rewards/rejected": -0.08411821722984314, + "step": 2520 + }, + { + "epoch": 1.32, + "learning_rate": 3.426564450236249e-07, + "logits/chosen": 0.3033444881439209, + "logits/rejected": 0.338754266500473, + "logps/chosen": -296.7602233886719, + "logps/rejected": -301.8728942871094, + "loss": 0.6655, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.04776741936802864, + "rewards/margins": 0.041456062346696854, + "rewards/rejected": -0.08922348916530609, + "step": 2530 + }, + { + "epoch": 1.33, + "learning_rate": 3.4124077204992576e-07, + "logits/chosen": 0.32950612902641296, + "logits/rejected": 0.3448053002357483, + "logps/chosen": -285.41912841796875, + "logps/rejected": -230.43820190429688, + "loss": 0.6621, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 0.006403304636478424, + "rewards/margins": 0.08996753394603729, + "rewards/rejected": -0.08356423676013947, + "step": 2540 + }, + { + "epoch": 1.33, + "learning_rate": 3.398217156532125e-07, + "logits/chosen": 0.3072153627872467, + "logits/rejected": 0.3017449676990509, + "logps/chosen": -314.2388916015625, + "logps/rejected": -291.82501220703125, + "loss": 0.6642, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 0.009415028616786003, + "rewards/margins": 0.09692169725894928, + "rewards/rejected": -0.08750666677951813, + "step": 2550 + }, + { + "epoch": 1.34, + "learning_rate": 3.383993284554431e-07, + "logits/chosen": 0.2225678414106369, + "logits/rejected": 0.22605307400226593, + "logps/chosen": -339.57342529296875, + "logps/rejected": -302.07830810546875, + "loss": 0.6687, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.015412566252052784, + "rewards/margins": 0.05832207202911377, + "rewards/rejected": -0.07373463362455368, + "step": 2560 + }, + { + "epoch": 1.34, + "learning_rate": 3.3697366320208955e-07, + "logits/chosen": 0.2920198142528534, + "logits/rejected": 0.3342500627040863, + "logps/chosen": -296.05169677734375, + "logps/rejected": -315.5035705566406, + "loss": 0.6687, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.03576388955116272, + "rewards/margins": 0.07963220775127411, + "rewards/rejected": -0.11539609730243683, + "step": 2570 + }, + { + "epoch": 1.35, + "learning_rate": 3.355447727601816e-07, + "logits/chosen": 0.3131474554538727, + "logits/rejected": 0.35242384672164917, + "logps/chosen": -318.4373474121094, + "logps/rejected": -317.1743469238281, + "loss": 0.6656, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.0381944365799427, + "rewards/margins": 0.0370551198720932, + "rewards/rejected": -0.0752495601773262, + "step": 2580 + }, + { + "epoch": 1.36, + "learning_rate": 3.3411271011634697e-07, + "logits/chosen": 0.40101736783981323, + "logits/rejected": 0.34883618354797363, + "logps/chosen": -295.4554443359375, + "logps/rejected": -307.9974365234375, + "loss": 0.6742, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.020811382681131363, + "rewards/margins": 0.06216179206967354, + "rewards/rejected": -0.0829731673002243, + "step": 2590 + }, + { + "epoch": 1.36, + "learning_rate": 3.3267752837484587e-07, + "logits/chosen": 0.3139980435371399, + "logits/rejected": 0.3570219576358795, + "logps/chosen": -332.45684814453125, + "logps/rejected": -294.8971862792969, + "loss": 0.6644, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.03061739169061184, + "rewards/margins": 0.049742937088012695, + "rewards/rejected": -0.08036032319068909, + "step": 2600 + }, + { + "epoch": 1.37, + "learning_rate": 3.31239280755602e-07, + "logits/chosen": 0.37406110763549805, + "logits/rejected": 0.3708162307739258, + "logps/chosen": -272.1312255859375, + "logps/rejected": -225.3962860107422, + "loss": 0.6657, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.04108922928571701, + "rewards/margins": 0.0460977777838707, + "rewards/rejected": -0.0871870145201683, + "step": 2610 + }, + { + "epoch": 1.37, + "learning_rate": 3.2979802059222936e-07, + "logits/chosen": 0.30520889163017273, + "logits/rejected": 0.3104460835456848, + "logps/chosen": -288.23614501953125, + "logps/rejected": -305.2883605957031, + "loss": 0.6677, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.060263652354478836, + "rewards/margins": 0.018435927107930183, + "rewards/rejected": -0.07869957387447357, + "step": 2620 + }, + { + "epoch": 1.38, + "learning_rate": 3.283538013300537e-07, + "logits/chosen": 0.2660229504108429, + "logits/rejected": 0.25236591696739197, + "logps/chosen": -316.4666442871094, + "logps/rejected": -251.6602325439453, + "loss": 0.6645, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.05971134454011917, + "rewards/margins": 0.06788526475429535, + "rewards/rejected": -0.12759660184383392, + "step": 2630 + }, + { + "epoch": 1.38, + "learning_rate": 3.269066765241314e-07, + "logits/chosen": 0.353706419467926, + "logits/rejected": 0.2846869230270386, + "logps/chosen": -340.47100830078125, + "logps/rejected": -274.4219970703125, + "loss": 0.6665, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.04179629683494568, + "rewards/margins": 0.08098026365041733, + "rewards/rejected": -0.12277655303478241, + "step": 2640 + }, + { + "epoch": 1.39, + "learning_rate": 3.254566998372634e-07, + "logits/chosen": 0.3207184076309204, + "logits/rejected": 0.3243858218193054, + "logps/chosen": -291.7104797363281, + "logps/rejected": -275.7958068847656, + "loss": 0.6687, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.023017197847366333, + "rewards/margins": 0.06596306711435318, + "rewards/rejected": -0.08898027241230011, + "step": 2650 + }, + { + "epoch": 1.39, + "learning_rate": 3.2400392503800477e-07, + "logits/chosen": 0.2825477719306946, + "logits/rejected": 0.30369895696640015, + "logps/chosen": -270.70367431640625, + "logps/rejected": -311.6441955566406, + "loss": 0.6665, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.054010480642318726, + "rewards/margins": 0.06956785917282104, + "rewards/rejected": -0.12357833236455917, + "step": 2660 + }, + { + "epoch": 1.4, + "learning_rate": 3.225484059986715e-07, + "logits/chosen": 0.37458476424217224, + "logits/rejected": 0.3407798409461975, + "logps/chosen": -310.98712158203125, + "logps/rejected": -299.9549865722656, + "loss": 0.6777, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.05233936384320259, + "rewards/margins": 0.025646230205893517, + "rewards/rejected": -0.07798559963703156, + "step": 2670 + }, + { + "epoch": 1.4, + "learning_rate": 3.2109019669334215e-07, + "logits/chosen": 0.32417067885398865, + "logits/rejected": 0.317020982503891, + "logps/chosen": -295.0063171386719, + "logps/rejected": -251.1151123046875, + "loss": 0.6652, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.03081471286714077, + "rewards/margins": 0.07520559430122375, + "rewards/rejected": -0.10602030903100967, + "step": 2680 + }, + { + "epoch": 1.41, + "learning_rate": 3.19629351195857e-07, + "logits/chosen": 0.3401109576225281, + "logits/rejected": 0.33117538690567017, + "logps/chosen": -269.0340270996094, + "logps/rejected": -289.77703857421875, + "loss": 0.663, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.07183998078107834, + "rewards/margins": 0.021650653332471848, + "rewards/rejected": -0.09349063783884048, + "step": 2690 + }, + { + "epoch": 1.41, + "learning_rate": 3.1816592367781236e-07, + "logits/chosen": 0.3018365502357483, + "logits/rejected": 0.2779509127140045, + "logps/chosen": -290.2937927246094, + "logps/rejected": -261.4476623535156, + "loss": 0.6619, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.046037644147872925, + "rewards/margins": 0.06918086111545563, + "rewards/rejected": -0.11521850526332855, + "step": 2700 + }, + { + "epoch": 1.42, + "learning_rate": 3.166999684065521e-07, + "logits/chosen": 0.2595480680465698, + "logits/rejected": 0.25535058975219727, + "logps/chosen": -278.13775634765625, + "logps/rejected": -259.5060729980469, + "loss": 0.6631, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.05215588957071304, + "rewards/margins": 0.06091712787747383, + "rewards/rejected": -0.11307301372289658, + "step": 2710 + }, + { + "epoch": 1.42, + "learning_rate": 3.1523153974315497e-07, + "logits/chosen": 0.3285236954689026, + "logits/rejected": 0.25150084495544434, + "logps/chosen": -398.76983642578125, + "logps/rejected": -276.6143493652344, + "loss": 0.658, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.037296637892723083, + "rewards/margins": 0.07015573978424072, + "rewards/rejected": -0.10745237022638321, + "step": 2720 + }, + { + "epoch": 1.43, + "learning_rate": 3.137606921404191e-07, + "logits/chosen": 0.30357763171195984, + "logits/rejected": 0.2153654545545578, + "logps/chosen": -385.17486572265625, + "logps/rejected": -260.77166748046875, + "loss": 0.6598, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 0.009311881847679615, + "rewards/margins": 0.13724280893802643, + "rewards/rejected": -0.1279309242963791, + "step": 2730 + }, + { + "epoch": 1.43, + "learning_rate": 3.1228748014084243e-07, + "logits/chosen": 0.4386512339115143, + "logits/rejected": 0.42425936460494995, + "logps/chosen": -256.3681640625, + "logps/rejected": -229.419677734375, + "loss": 0.6658, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.048789579421281815, + "rewards/margins": 0.07216767966747284, + "rewards/rejected": -0.12095727026462555, + "step": 2740 + }, + { + "epoch": 1.44, + "learning_rate": 3.108119583746005e-07, + "logits/chosen": 0.28032955527305603, + "logits/rejected": 0.3129872679710388, + "logps/chosen": -233.5014190673828, + "logps/rejected": -281.8854064941406, + "loss": 0.6639, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.04254484921693802, + "rewards/margins": 0.07571487128734589, + "rewards/rejected": -0.11825971305370331, + "step": 2750 + }, + { + "epoch": 1.44, + "learning_rate": 3.093341815575202e-07, + "logits/chosen": 0.37104588747024536, + "logits/rejected": 0.3991813063621521, + "logps/chosen": -306.2823791503906, + "logps/rejected": -334.3895568847656, + "loss": 0.6625, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.03866010531783104, + "rewards/margins": 0.0574616901576519, + "rewards/rejected": -0.09612180292606354, + "step": 2760 + }, + { + "epoch": 1.45, + "learning_rate": 3.078542044890513e-07, + "logits/chosen": 0.369444876909256, + "logits/rejected": 0.41406869888305664, + "logps/chosen": -263.76654052734375, + "logps/rejected": -248.6498260498047, + "loss": 0.6668, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.066460520029068, + "rewards/margins": 0.08304537087678909, + "rewards/rejected": -0.1495058834552765, + "step": 2770 + }, + { + "epoch": 1.45, + "learning_rate": 3.0637208205023386e-07, + "logits/chosen": 0.3381851315498352, + "logits/rejected": 0.36313921213150024, + "logps/chosen": -293.7001953125, + "logps/rejected": -306.66766357421875, + "loss": 0.6625, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.04342319816350937, + "rewards/margins": 0.06256397068500519, + "rewards/rejected": -0.10598716884851456, + "step": 2780 + }, + { + "epoch": 1.46, + "learning_rate": 3.0488786920166343e-07, + "logits/chosen": 0.33373400568962097, + "logits/rejected": 0.3136371076107025, + "logps/chosen": -313.1701354980469, + "logps/rejected": -301.2196960449219, + "loss": 0.6533, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.04934342950582504, + "rewards/margins": 0.09779079258441925, + "rewards/rejected": -0.1471342295408249, + "step": 2790 + }, + { + "epoch": 1.47, + "learning_rate": 3.034016209814529e-07, + "logits/chosen": 0.30923396348953247, + "logits/rejected": 0.29981285333633423, + "logps/chosen": -287.7221374511719, + "logps/rejected": -223.39950561523438, + "loss": 0.6682, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.04875212907791138, + "rewards/margins": 0.08592663705348969, + "rewards/rejected": -0.13467876613140106, + "step": 2800 + }, + { + "epoch": 1.47, + "learning_rate": 3.0191339250319147e-07, + "logits/chosen": 0.3353407382965088, + "logits/rejected": 0.37832242250442505, + "logps/chosen": -333.6568603515625, + "logps/rejected": -342.98223876953125, + "loss": 0.6533, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.07310348749160767, + "rewards/margins": 0.07849867641925812, + "rewards/rejected": -0.1516021341085434, + "step": 2810 + }, + { + "epoch": 1.48, + "learning_rate": 3.004232389539011e-07, + "logits/chosen": 0.2526446282863617, + "logits/rejected": 0.2754290699958801, + "logps/chosen": -335.6014709472656, + "logps/rejected": -305.3747253417969, + "loss": 0.6684, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.09115692973136902, + "rewards/margins": 0.0300412829965353, + "rewards/rejected": -0.12119821459054947, + "step": 2820 + }, + { + "epoch": 1.48, + "learning_rate": 2.989312155919898e-07, + "logits/chosen": 0.27508842945098877, + "logits/rejected": 0.22659547626972198, + "logps/chosen": -311.8320007324219, + "logps/rejected": -304.56805419921875, + "loss": 0.6691, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.032169900834560394, + "rewards/margins": 0.10568948835134506, + "rewards/rejected": -0.13785937428474426, + "step": 2830 + }, + { + "epoch": 1.49, + "learning_rate": 2.9743737774520266e-07, + "logits/chosen": 0.33120667934417725, + "logits/rejected": 0.30626198649406433, + "logps/chosen": -314.50384521484375, + "logps/rejected": -244.17703247070312, + "loss": 0.6659, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.059983305633068085, + "rewards/margins": 0.0858563557267189, + "rewards/rejected": -0.145839661359787, + "step": 2840 + }, + { + "epoch": 1.49, + "learning_rate": 2.959417808085702e-07, + "logits/chosen": 0.25924453139305115, + "logits/rejected": 0.25570935010910034, + "logps/chosen": -332.52886962890625, + "logps/rejected": -291.54901123046875, + "loss": 0.6669, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.04318776726722717, + "rewards/margins": 0.07569596916437149, + "rewards/rejected": -0.11888374388217926, + "step": 2850 + }, + { + "epoch": 1.5, + "learning_rate": 2.944444802423542e-07, + "logits/chosen": 0.292915940284729, + "logits/rejected": 0.2799091339111328, + "logps/chosen": -324.13507080078125, + "logps/rejected": -310.09613037109375, + "loss": 0.6586, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.05468999221920967, + "rewards/margins": 0.0883164256811142, + "rewards/rejected": -0.14300641417503357, + "step": 2860 + }, + { + "epoch": 1.5, + "learning_rate": 2.929455315699908e-07, + "logits/chosen": 0.34056323766708374, + "logits/rejected": 0.31849169731140137, + "logps/chosen": -360.6206359863281, + "logps/rejected": -268.1399841308594, + "loss": 0.6643, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.04418856278061867, + "rewards/margins": 0.1260400414466858, + "rewards/rejected": -0.17022861540317535, + "step": 2870 + }, + { + "epoch": 1.51, + "learning_rate": 2.9144499037603204e-07, + "logits/chosen": 0.40575551986694336, + "logits/rejected": 0.42678695917129517, + "logps/chosen": -291.07928466796875, + "logps/rejected": -285.8336181640625, + "loss": 0.659, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.03479582443833351, + "rewards/margins": 0.0920044481754303, + "rewards/rejected": -0.1268002688884735, + "step": 2880 + }, + { + "epoch": 1.51, + "learning_rate": 2.899429123040843e-07, + "logits/chosen": 0.32538041472435, + "logits/rejected": 0.30334755778312683, + "logps/chosen": -294.2937927246094, + "logps/rejected": -285.8299865722656, + "loss": 0.6629, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.07144945859909058, + "rewards/margins": 0.07757963240146637, + "rewards/rejected": -0.14902909100055695, + "step": 2890 + }, + { + "epoch": 1.52, + "learning_rate": 2.884393530547452e-07, + "logits/chosen": 0.378600537776947, + "logits/rejected": 0.2921772301197052, + "logps/chosen": -343.437255859375, + "logps/rejected": -311.8553466796875, + "loss": 0.6584, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.04117577522993088, + "rewards/margins": 0.07172152400016785, + "rewards/rejected": -0.11289729923009872, + "step": 2900 + }, + { + "epoch": 1.52, + "learning_rate": 2.869343683835376e-07, + "logits/chosen": 0.30608147382736206, + "logits/rejected": 0.2674848735332489, + "logps/chosen": -358.9638671875, + "logps/rejected": -281.90582275390625, + "loss": 0.6538, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.023800866678357124, + "rewards/margins": 0.11379053443670273, + "rewards/rejected": -0.1375913918018341, + "step": 2910 + }, + { + "epoch": 1.53, + "learning_rate": 2.8542801409884253e-07, + "logits/chosen": 0.3522571921348572, + "logits/rejected": 0.3938680589199066, + "logps/chosen": -267.18597412109375, + "logps/rejected": -226.17062377929688, + "loss": 0.6623, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.05746225267648697, + "rewards/margins": 0.06825403869152069, + "rewards/rejected": -0.12571629881858826, + "step": 2920 + }, + { + "epoch": 1.53, + "learning_rate": 2.839203460598297e-07, + "logits/chosen": 0.25303295254707336, + "logits/rejected": 0.308340847492218, + "logps/chosen": -384.5491027832031, + "logps/rejected": -375.91925048828125, + "loss": 0.6652, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.049062587320804596, + "rewards/margins": 0.08676549792289734, + "rewards/rejected": -0.13582809269428253, + "step": 2930 + }, + { + "epoch": 1.54, + "learning_rate": 2.8241142017438557e-07, + "logits/chosen": 0.36798354983329773, + "logits/rejected": 0.3655903935432434, + "logps/chosen": -257.49383544921875, + "logps/rejected": -294.4346923828125, + "loss": 0.6627, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.06181669980287552, + "rewards/margins": 0.08136054128408432, + "rewards/rejected": -0.14317724108695984, + "step": 2940 + }, + { + "epoch": 1.54, + "learning_rate": 2.8090129239704083e-07, + "logits/chosen": 0.2773851156234741, + "logits/rejected": 0.2811713218688965, + "logps/chosen": -329.9738464355469, + "logps/rejected": -298.5468444824219, + "loss": 0.6573, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.06664275377988815, + "rewards/margins": 0.059970151633024216, + "rewards/rejected": -0.12661293148994446, + "step": 2950 + }, + { + "epoch": 1.55, + "learning_rate": 2.7939001872689496e-07, + "logits/chosen": 0.32175400853157043, + "logits/rejected": 0.3031854033470154, + "logps/chosen": -328.90771484375, + "logps/rejected": -307.1551208496094, + "loss": 0.6579, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.03462161868810654, + "rewards/margins": 0.10548686981201172, + "rewards/rejected": -0.14010848104953766, + "step": 2960 + }, + { + "epoch": 1.55, + "learning_rate": 2.778776552055398e-07, + "logits/chosen": 0.4104437828063965, + "logits/rejected": 0.3513795733451843, + "logps/chosen": -294.09381103515625, + "logps/rejected": -247.57510375976562, + "loss": 0.6591, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.04517308250069618, + "rewards/margins": 0.10987844318151474, + "rewards/rejected": -0.15505151450634003, + "step": 2970 + }, + { + "epoch": 1.56, + "learning_rate": 2.763642579149817e-07, + "logits/chosen": 0.27305012941360474, + "logits/rejected": 0.27377820014953613, + "logps/chosen": -300.01483154296875, + "logps/rejected": -337.39788818359375, + "loss": 0.6597, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.047467950731515884, + "rewards/margins": 0.05927792191505432, + "rewards/rejected": -0.1067458763718605, + "step": 2980 + }, + { + "epoch": 1.56, + "learning_rate": 2.748498829755615e-07, + "logits/chosen": 0.32688483595848083, + "logits/rejected": 0.3695555627346039, + "logps/chosen": -307.6633605957031, + "logps/rejected": -285.3849182128906, + "loss": 0.6555, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.06436268240213394, + "rewards/margins": 0.08075664192438126, + "rewards/rejected": -0.1451193392276764, + "step": 2990 + }, + { + "epoch": 1.57, + "learning_rate": 2.7333458654387344e-07, + "logits/chosen": 0.26363521814346313, + "logits/rejected": 0.26629766821861267, + "logps/chosen": -343.8567810058594, + "logps/rejected": -367.24700927734375, + "loss": 0.6641, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09303367137908936, + "rewards/margins": 0.033451493829488754, + "rewards/rejected": -0.12648515403270721, + "step": 3000 + }, + { + "epoch": 1.58, + "learning_rate": 2.718184248106828e-07, + "logits/chosen": 0.3711121380329132, + "logits/rejected": 0.3596826195716858, + "logps/chosen": -298.93804931640625, + "logps/rejected": -271.2373046875, + "loss": 0.6632, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.05417951941490173, + "rewards/margins": 0.0800536572933197, + "rewards/rejected": -0.13423319160938263, + "step": 3010 + }, + { + "epoch": 1.58, + "learning_rate": 2.7030145399884275e-07, + "logits/chosen": 0.30993330478668213, + "logits/rejected": 0.24670329689979553, + "logps/chosen": -302.9925842285156, + "logps/rejected": -254.4071807861328, + "loss": 0.6577, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.10473034530878067, + "rewards/margins": 0.03766946122050285, + "rewards/rejected": -0.14239981770515442, + "step": 3020 + }, + { + "epoch": 1.59, + "learning_rate": 2.687837303612085e-07, + "logits/chosen": 0.4012434482574463, + "logits/rejected": 0.3031303286552429, + "logps/chosen": -306.01971435546875, + "logps/rejected": -336.9535827636719, + "loss": 0.6605, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.06967012584209442, + "rewards/margins": 0.10760972648859024, + "rewards/rejected": -0.17727985978126526, + "step": 3030 + }, + { + "epoch": 1.59, + "learning_rate": 2.672653101785519e-07, + "logits/chosen": 0.3572728931903839, + "logits/rejected": 0.3076297640800476, + "logps/chosen": -343.535400390625, + "logps/rejected": -291.32000732421875, + "loss": 0.655, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.06969340145587921, + "rewards/margins": 0.10455378144979477, + "rewards/rejected": -0.17424717545509338, + "step": 3040 + }, + { + "epoch": 1.6, + "learning_rate": 2.657462497574747e-07, + "logits/chosen": 0.3425232172012329, + "logits/rejected": 0.31714963912963867, + "logps/chosen": -333.19268798828125, + "logps/rejected": -256.7529296875, + "loss": 0.6586, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.05247022956609726, + "rewards/margins": 0.09439842402935028, + "rewards/rejected": -0.14686864614486694, + "step": 3050 + }, + { + "epoch": 1.6, + "learning_rate": 2.642266054283198e-07, + "logits/chosen": 0.26972079277038574, + "logits/rejected": 0.27910494804382324, + "logps/chosen": -305.31048583984375, + "logps/rejected": -318.89703369140625, + "loss": 0.6614, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.07155313342809677, + "rewards/margins": 0.03983413800597191, + "rewards/rejected": -0.11138726770877838, + "step": 3060 + }, + { + "epoch": 1.61, + "learning_rate": 2.627064335430829e-07, + "logits/chosen": 0.27563899755477905, + "logits/rejected": 0.28687483072280884, + "logps/chosen": -327.7835388183594, + "logps/rejected": -291.73126220703125, + "loss": 0.665, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.05870665982365608, + "rewards/margins": 0.11560998111963272, + "rewards/rejected": -0.1743166148662567, + "step": 3070 + }, + { + "epoch": 1.61, + "learning_rate": 2.611857904733227e-07, + "logits/chosen": 0.3899874985218048, + "logits/rejected": 0.35909098386764526, + "logps/chosen": -366.1586608886719, + "logps/rejected": -318.25494384765625, + "loss": 0.664, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.05585475638508797, + "rewards/margins": 0.08511849492788315, + "rewards/rejected": -0.14097324013710022, + "step": 3080 + }, + { + "epoch": 1.62, + "learning_rate": 2.5966473260807076e-07, + "logits/chosen": 0.28359925746917725, + "logits/rejected": 0.26255688071250916, + "logps/chosen": -339.81292724609375, + "logps/rejected": -299.61956787109375, + "loss": 0.6508, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.06393036991357803, + "rewards/margins": 0.08874894678592682, + "rewards/rejected": -0.15267930924892426, + "step": 3090 + }, + { + "epoch": 1.62, + "learning_rate": 2.5814331635173987e-07, + "logits/chosen": 0.30540210008621216, + "logits/rejected": 0.26466771960258484, + "logps/chosen": -297.3798828125, + "logps/rejected": -266.07757568359375, + "loss": 0.6599, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.10192809998989105, + "rewards/margins": 0.04608898609876633, + "rewards/rejected": -0.14801709353923798, + "step": 3100 + }, + { + "epoch": 1.63, + "learning_rate": 2.566215981220331e-07, + "logits/chosen": 0.3526119589805603, + "logits/rejected": 0.3886163532733917, + "logps/chosen": -245.1267547607422, + "logps/rejected": -252.8359832763672, + "loss": 0.6681, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.12257083505392075, + "rewards/margins": 0.012356823310256004, + "rewards/rejected": -0.1349276602268219, + "step": 3110 + }, + { + "epoch": 1.63, + "learning_rate": 2.550996343478514e-07, + "logits/chosen": 0.3826276659965515, + "logits/rejected": 0.40776434540748596, + "logps/chosen": -245.58157348632812, + "logps/rejected": -256.46234130859375, + "loss": 0.6612, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.06684736907482147, + "rewards/margins": 0.04188776761293411, + "rewards/rejected": -0.10873512923717499, + "step": 3120 + }, + { + "epoch": 1.64, + "learning_rate": 2.5357748146720076e-07, + "logits/chosen": 0.38492801785469055, + "logits/rejected": 0.3946647047996521, + "logps/chosen": -305.4632873535156, + "logps/rejected": -279.2232666015625, + "loss": 0.6527, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.07274258881807327, + "rewards/margins": 0.05328000336885452, + "rewards/rejected": -0.1260225772857666, + "step": 3130 + }, + { + "epoch": 1.64, + "learning_rate": 2.5205519592509993e-07, + "logits/chosen": 0.31763720512390137, + "logits/rejected": 0.32317763566970825, + "logps/chosen": -332.72076416015625, + "logps/rejected": -300.34222412109375, + "loss": 0.6565, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.07132816314697266, + "rewards/margins": 0.08335666358470917, + "rewards/rejected": -0.15468482673168182, + "step": 3140 + }, + { + "epoch": 1.65, + "learning_rate": 2.505328341714873e-07, + "logits/chosen": 0.3209022283554077, + "logits/rejected": 0.353001207113266, + "logps/chosen": -296.9544372558594, + "logps/rejected": -305.7718200683594, + "loss": 0.6567, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.08769343048334122, + "rewards/margins": 0.06560392677783966, + "rewards/rejected": -0.15329734981060028, + "step": 3150 + }, + { + "epoch": 1.65, + "learning_rate": 2.4901045265912687e-07, + "logits/chosen": 0.40290650725364685, + "logits/rejected": 0.38739943504333496, + "logps/chosen": -284.33612060546875, + "logps/rejected": -271.75872802734375, + "loss": 0.6655, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.052501898258924484, + "rewards/margins": 0.11684814840555191, + "rewards/rejected": -0.1693500280380249, + "step": 3160 + }, + { + "epoch": 1.66, + "learning_rate": 2.4748810784151555e-07, + "logits/chosen": 0.2552871108055115, + "logits/rejected": 0.2728117108345032, + "logps/chosen": -282.8292541503906, + "logps/rejected": -245.09439086914062, + "loss": 0.6584, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.07773838937282562, + "rewards/margins": 0.06995000690221786, + "rewards/rejected": -0.14768841862678528, + "step": 3170 + }, + { + "epoch": 1.66, + "learning_rate": 2.459658561707898e-07, + "logits/chosen": 0.2766292095184326, + "logits/rejected": 0.3535088002681732, + "logps/chosen": -265.8050537109375, + "logps/rejected": -232.62393188476562, + "loss": 0.6518, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.11231112480163574, + "rewards/margins": 0.05461050942540169, + "rewards/rejected": -0.16692163050174713, + "step": 3180 + }, + { + "epoch": 1.67, + "learning_rate": 2.4444375409563145e-07, + "logits/chosen": 0.28582626581192017, + "logits/rejected": 0.21247181296348572, + "logps/chosen": -326.25750732421875, + "logps/rejected": -261.94793701171875, + "loss": 0.6519, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08714159578084946, + "rewards/margins": 0.09397827088832855, + "rewards/rejected": -0.1811198890209198, + "step": 3190 + }, + { + "epoch": 1.67, + "learning_rate": 2.429218580591753e-07, + "logits/chosen": 0.36546292901039124, + "logits/rejected": 0.31438449025154114, + "logps/chosen": -293.0782775878906, + "logps/rejected": -322.4738464355469, + "loss": 0.6569, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.06082568317651749, + "rewards/margins": 0.1483532190322876, + "rewards/rejected": -0.2091788798570633, + "step": 3200 + }, + { + "epoch": 1.68, + "learning_rate": 2.414002244969158e-07, + "logits/chosen": 0.26624953746795654, + "logits/rejected": 0.23065993189811707, + "logps/chosen": -330.13323974609375, + "logps/rejected": -318.6907958984375, + "loss": 0.6508, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.07344881445169449, + "rewards/margins": 0.11431051790714264, + "rewards/rejected": -0.18775932490825653, + "step": 3210 + }, + { + "epoch": 1.68, + "learning_rate": 2.3987890983461403e-07, + "logits/chosen": 0.3105958104133606, + "logits/rejected": 0.3086475729942322, + "logps/chosen": -274.396484375, + "logps/rejected": -301.52557373046875, + "loss": 0.6658, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.10729696601629257, + "rewards/margins": 0.04789247363805771, + "rewards/rejected": -0.1551894247531891, + "step": 3220 + }, + { + "epoch": 1.69, + "learning_rate": 2.3835797048620564e-07, + "logits/chosen": 0.4288257956504822, + "logits/rejected": 0.31377506256103516, + "logps/chosen": -331.15008544921875, + "logps/rejected": -216.7270050048828, + "loss": 0.6496, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.066102035343647, + "rewards/margins": 0.09226211160421371, + "rewards/rejected": -0.15836414694786072, + "step": 3230 + }, + { + "epoch": 1.7, + "learning_rate": 2.368374628517088e-07, + "logits/chosen": 0.26842182874679565, + "logits/rejected": 0.23748120665550232, + "logps/chosen": -340.2176208496094, + "logps/rejected": -247.35824584960938, + "loss": 0.6574, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.11225073039531708, + "rewards/margins": 0.09872641414403915, + "rewards/rejected": -0.21097715198993683, + "step": 3240 + }, + { + "epoch": 1.7, + "learning_rate": 2.3531744331513247e-07, + "logits/chosen": 0.3615303635597229, + "logits/rejected": 0.3841082453727722, + "logps/chosen": -357.51458740234375, + "logps/rejected": -380.087890625, + "loss": 0.6524, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.0837298184633255, + "rewards/margins": 0.06683110445737839, + "rewards/rejected": -0.15056093037128448, + "step": 3250 + }, + { + "epoch": 1.71, + "learning_rate": 2.3379796824238608e-07, + "logits/chosen": 0.37287023663520813, + "logits/rejected": 0.330514132976532, + "logps/chosen": -304.1961975097656, + "logps/rejected": -266.56610107421875, + "loss": 0.6518, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.0935707688331604, + "rewards/margins": 0.05958444997668266, + "rewards/rejected": -0.15315520763397217, + "step": 3260 + }, + { + "epoch": 1.71, + "learning_rate": 2.3227909397918894e-07, + "logits/chosen": 0.33427393436431885, + "logits/rejected": 0.24959492683410645, + "logps/chosen": -320.3334045410156, + "logps/rejected": -285.8636169433594, + "loss": 0.6577, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.044574957340955734, + "rewards/margins": 0.08377736806869507, + "rewards/rejected": -0.1283523142337799, + "step": 3270 + }, + { + "epoch": 1.72, + "learning_rate": 2.3076087684898076e-07, + "logits/chosen": 0.24056890606880188, + "logits/rejected": 0.22842903435230255, + "logps/chosen": -336.13067626953125, + "logps/rejected": -277.524658203125, + "loss": 0.6559, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.08839359879493713, + "rewards/margins": 0.13642925024032593, + "rewards/rejected": -0.22482284903526306, + "step": 3280 + }, + { + "epoch": 1.72, + "learning_rate": 2.2924337315083353e-07, + "logits/chosen": 0.3178374767303467, + "logits/rejected": 0.24835416674613953, + "logps/chosen": -421.72589111328125, + "logps/rejected": -324.20416259765625, + "loss": 0.659, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.07225541770458221, + "rewards/margins": 0.11300001293420792, + "rewards/rejected": -0.18525540828704834, + "step": 3290 + }, + { + "epoch": 1.73, + "learning_rate": 2.277266391573633e-07, + "logits/chosen": 0.20869994163513184, + "logits/rejected": 0.2740298807621002, + "logps/chosen": -368.10650634765625, + "logps/rejected": -339.2476501464844, + "loss": 0.6633, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.05671432614326477, + "rewards/margins": 0.06923334300518036, + "rewards/rejected": -0.12594766914844513, + "step": 3300 + }, + { + "epoch": 1.73, + "learning_rate": 2.2621073111264357e-07, + "logits/chosen": 0.2960966229438782, + "logits/rejected": 0.27994006872177124, + "logps/chosen": -285.3283386230469, + "logps/rejected": -246.00830078125, + "loss": 0.6598, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.08893431723117828, + "rewards/margins": 0.055390290915966034, + "rewards/rejected": -0.14432461559772491, + "step": 3310 + }, + { + "epoch": 1.74, + "learning_rate": 2.2469570523011993e-07, + "logits/chosen": 0.28653472661972046, + "logits/rejected": 0.25573134422302246, + "logps/chosen": -274.28167724609375, + "logps/rejected": -297.33184814453125, + "loss": 0.6671, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.11661437898874283, + "rewards/margins": 0.023711198940873146, + "rewards/rejected": -0.14032557606697083, + "step": 3320 + }, + { + "epoch": 1.74, + "learning_rate": 2.2318161769052525e-07, + "logits/chosen": 0.30002620816230774, + "logits/rejected": 0.29848071932792664, + "logps/chosen": -354.6106872558594, + "logps/rejected": -291.1500244140625, + "loss": 0.662, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.11448033154010773, + "rewards/margins": 0.07472564280033112, + "rewards/rejected": -0.18920597434043884, + "step": 3330 + }, + { + "epoch": 1.75, + "learning_rate": 2.2166852463979624e-07, + "logits/chosen": 0.3476138710975647, + "logits/rejected": 0.3336217999458313, + "logps/chosen": -331.3888244628906, + "logps/rejected": -294.93829345703125, + "loss": 0.6529, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.07934984564781189, + "rewards/margins": 0.09157811850309372, + "rewards/rejected": -0.1709279716014862, + "step": 3340 + }, + { + "epoch": 1.75, + "learning_rate": 2.20156482186992e-07, + "logits/chosen": 0.2583394944667816, + "logits/rejected": 0.27358004450798035, + "logps/chosen": -300.673828125, + "logps/rejected": -256.2142028808594, + "loss": 0.651, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0521802194416523, + "rewards/margins": 0.16437430679798126, + "rewards/rejected": -0.21655452251434326, + "step": 3350 + }, + { + "epoch": 1.76, + "learning_rate": 2.1864554640221244e-07, + "logits/chosen": 0.3455668091773987, + "logits/rejected": 0.3013859987258911, + "logps/chosen": -386.6537170410156, + "logps/rejected": -359.4359130859375, + "loss": 0.6556, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.10155276954174042, + "rewards/margins": 0.06396753340959549, + "rewards/rejected": -0.1655203104019165, + "step": 3360 + }, + { + "epoch": 1.76, + "learning_rate": 2.1713577331452016e-07, + "logits/chosen": 0.246551513671875, + "logits/rejected": 0.21105051040649414, + "logps/chosen": -338.63525390625, + "logps/rejected": -307.242919921875, + "loss": 0.6525, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.0833117812871933, + "rewards/margins": 0.12226946651935577, + "rewards/rejected": -0.20558123290538788, + "step": 3370 + }, + { + "epoch": 1.77, + "learning_rate": 2.1562721890986199e-07, + "logits/chosen": 0.3521527945995331, + "logits/rejected": 0.32254111766815186, + "logps/chosen": -271.57318115234375, + "logps/rejected": -228.3287353515625, + "loss": 0.6581, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.08410477638244629, + "rewards/margins": 0.09148812294006348, + "rewards/rejected": -0.17559289932250977, + "step": 3380 + }, + { + "epoch": 1.77, + "learning_rate": 2.1411993912899285e-07, + "logits/chosen": 0.29545360803604126, + "logits/rejected": 0.29878222942352295, + "logps/chosen": -256.453125, + "logps/rejected": -274.4709777832031, + "loss": 0.6481, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.0756291002035141, + "rewards/margins": 0.09634210169315338, + "rewards/rejected": -0.1719711869955063, + "step": 3390 + }, + { + "epoch": 1.78, + "learning_rate": 2.126139898654021e-07, + "logits/chosen": 0.38120537996292114, + "logits/rejected": 0.37205177545547485, + "logps/chosen": -398.5644226074219, + "logps/rejected": -280.35357666015625, + "loss": 0.6443, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.0817885547876358, + "rewards/margins": 0.09885601699352264, + "rewards/rejected": -0.18064458668231964, + "step": 3400 + }, + { + "epoch": 1.78, + "learning_rate": 2.1110942696324012e-07, + "logits/chosen": 0.2960183024406433, + "logits/rejected": 0.29262787103652954, + "logps/chosen": -383.98211669921875, + "logps/rejected": -315.3439025878906, + "loss": 0.6545, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.07995359599590302, + "rewards/margins": 0.11479581892490387, + "rewards/rejected": -0.19474941492080688, + "step": 3410 + }, + { + "epoch": 1.79, + "learning_rate": 2.0960630621524762e-07, + "logits/chosen": 0.30641573667526245, + "logits/rejected": 0.31198835372924805, + "logps/chosen": -352.9584655761719, + "logps/rejected": -342.90496826171875, + "loss": 0.65, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0978015661239624, + "rewards/margins": 0.08735918253660202, + "rewards/rejected": -0.1851607710123062, + "step": 3420 + }, + { + "epoch": 1.79, + "learning_rate": 2.0810468336068697e-07, + "logits/chosen": 0.26992154121398926, + "logits/rejected": 0.27308109402656555, + "logps/chosen": -315.85833740234375, + "logps/rejected": -311.920166015625, + "loss": 0.6574, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.08393695205450058, + "rewards/margins": 0.09377161413431168, + "rewards/rejected": -0.17770855128765106, + "step": 3430 + }, + { + "epoch": 1.8, + "learning_rate": 2.0660461408327535e-07, + "logits/chosen": 0.31459373235702515, + "logits/rejected": 0.32342398166656494, + "logps/chosen": -238.4505157470703, + "logps/rejected": -222.8074951171875, + "loss": 0.6567, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.08796955645084381, + "rewards/margins": 0.10214855521917343, + "rewards/rejected": -0.19011810421943665, + "step": 3440 + }, + { + "epoch": 1.81, + "learning_rate": 2.0510615400911906e-07, + "logits/chosen": 0.23679859936237335, + "logits/rejected": 0.21291649341583252, + "logps/chosen": -318.44915771484375, + "logps/rejected": -316.59967041015625, + "loss": 0.6603, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09217986464500427, + "rewards/margins": 0.05333445593714714, + "rewards/rejected": -0.14551430940628052, + "step": 3450 + }, + { + "epoch": 1.81, + "learning_rate": 2.0360935870465185e-07, + "logits/chosen": 0.38853684067726135, + "logits/rejected": 0.3678573668003082, + "logps/chosen": -260.76263427734375, + "logps/rejected": -278.4898986816406, + "loss": 0.6557, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10340939462184906, + "rewards/margins": 0.0924212783575058, + "rewards/rejected": -0.19583067297935486, + "step": 3460 + }, + { + "epoch": 1.82, + "learning_rate": 2.021142836745739e-07, + "logits/chosen": 0.3437570333480835, + "logits/rejected": 0.3541465699672699, + "logps/chosen": -264.78924560546875, + "logps/rejected": -235.6223907470703, + "loss": 0.6558, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.08142450451850891, + "rewards/margins": 0.1238001137971878, + "rewards/rejected": -0.2052246332168579, + "step": 3470 + }, + { + "epoch": 1.82, + "learning_rate": 2.0062098435979308e-07, + "logits/chosen": 0.28070321679115295, + "logits/rejected": 0.30005306005477905, + "logps/chosen": -326.1820983886719, + "logps/rejected": -315.1957702636719, + "loss": 0.6532, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.13556493818759918, + "rewards/margins": 0.06753392517566681, + "rewards/rejected": -0.2030988484621048, + "step": 3480 + }, + { + "epoch": 1.83, + "learning_rate": 1.9912951613536997e-07, + "logits/chosen": 0.3095873296260834, + "logits/rejected": 0.2897317111492157, + "logps/chosen": -317.66412353515625, + "logps/rejected": -253.30819702148438, + "loss": 0.6447, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.0781150683760643, + "rewards/margins": 0.1319970190525055, + "rewards/rejected": -0.2101120948791504, + "step": 3490 + }, + { + "epoch": 1.83, + "learning_rate": 1.9763993430846392e-07, + "logits/chosen": 0.3042958676815033, + "logits/rejected": 0.2619950771331787, + "logps/chosen": -341.4963073730469, + "logps/rejected": -233.6422882080078, + "loss": 0.6525, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10992898792028427, + "rewards/margins": 0.10214383900165558, + "rewards/rejected": -0.21207281947135925, + "step": 3500 + }, + { + "epoch": 1.84, + "learning_rate": 1.9615229411628212e-07, + "logits/chosen": 0.35564860701560974, + "logits/rejected": 0.3918618857860565, + "logps/chosen": -175.1331329345703, + "logps/rejected": -214.7126922607422, + "loss": 0.6595, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.12910182774066925, + "rewards/margins": 0.055904828011989594, + "rewards/rejected": -0.18500666320323944, + "step": 3510 + }, + { + "epoch": 1.84, + "learning_rate": 1.946666507240314e-07, + "logits/chosen": 0.2995254397392273, + "logits/rejected": 0.2703678011894226, + "logps/chosen": -272.8249206542969, + "logps/rejected": -238.4146270751953, + "loss": 0.6575, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.09808726608753204, + "rewards/margins": 0.04268048703670502, + "rewards/rejected": -0.14076772332191467, + "step": 3520 + }, + { + "epoch": 1.85, + "learning_rate": 1.9318305922287268e-07, + "logits/chosen": 0.3370177149772644, + "logits/rejected": 0.2872919738292694, + "logps/chosen": -335.13922119140625, + "logps/rejected": -325.77545166015625, + "loss": 0.6583, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09754248708486557, + "rewards/margins": 0.11342176049947739, + "rewards/rejected": -0.21096424758434296, + "step": 3530 + }, + { + "epoch": 1.85, + "learning_rate": 1.9170157462787762e-07, + "logits/chosen": 0.29330548644065857, + "logits/rejected": 0.2378358542919159, + "logps/chosen": -320.5285949707031, + "logps/rejected": -303.8361511230469, + "loss": 0.6629, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.08726056665182114, + "rewards/margins": 0.06550947576761246, + "rewards/rejected": -0.1527700424194336, + "step": 3540 + }, + { + "epoch": 1.86, + "learning_rate": 1.902222518759891e-07, + "logits/chosen": 0.2827271819114685, + "logits/rejected": 0.3013695180416107, + "logps/chosen": -354.85955810546875, + "logps/rejected": -337.86419677734375, + "loss": 0.6561, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.09215109795331955, + "rewards/margins": 0.1384793072938919, + "rewards/rejected": -0.23063039779663086, + "step": 3550 + }, + { + "epoch": 1.86, + "learning_rate": 1.8874514582398368e-07, + "logits/chosen": 0.29755842685699463, + "logits/rejected": 0.3269795775413513, + "logps/chosen": -318.353271484375, + "logps/rejected": -308.84576416015625, + "loss": 0.6531, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.09082455933094025, + "rewards/margins": 0.10677800327539444, + "rewards/rejected": -0.19760257005691528, + "step": 3560 + }, + { + "epoch": 1.87, + "learning_rate": 1.8727031124643738e-07, + "logits/chosen": 0.303489625453949, + "logits/rejected": 0.3056088984012604, + "logps/chosen": -327.00250244140625, + "logps/rejected": -273.66448974609375, + "loss": 0.6551, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.0742236077785492, + "rewards/margins": 0.16267745196819305, + "rewards/rejected": -0.23690101504325867, + "step": 3570 + }, + { + "epoch": 1.87, + "learning_rate": 1.8579780283369472e-07, + "logits/chosen": 0.32123714685440063, + "logits/rejected": 0.350780189037323, + "logps/chosen": -296.42919921875, + "logps/rejected": -271.38922119140625, + "loss": 0.6451, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.1357162892818451, + "rewards/margins": 0.055894482880830765, + "rewards/rejected": -0.19161078333854675, + "step": 3580 + }, + { + "epoch": 1.88, + "learning_rate": 1.8432767518984043e-07, + "logits/chosen": 0.3722311854362488, + "logits/rejected": 0.3453969955444336, + "logps/chosen": -301.4832458496094, + "logps/rejected": -240.32846069335938, + "loss": 0.6503, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.13313955068588257, + "rewards/margins": 0.09740877896547318, + "rewards/rejected": -0.23054830729961395, + "step": 3590 + }, + { + "epoch": 1.88, + "learning_rate": 1.8285998283067478e-07, + "logits/chosen": 0.24569134414196014, + "logits/rejected": 0.2697654366493225, + "logps/chosen": -363.0978698730469, + "logps/rejected": -371.66009521484375, + "loss": 0.6551, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.06352634727954865, + "rewards/margins": 0.108018659055233, + "rewards/rejected": -0.17154501378536224, + "step": 3600 + }, + { + "epoch": 1.89, + "learning_rate": 1.8139478018169197e-07, + "logits/chosen": 0.3788035213947296, + "logits/rejected": 0.31163662672042847, + "logps/chosen": -310.60528564453125, + "logps/rejected": -234.04910278320312, + "loss": 0.6549, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.047023043036460876, + "rewards/margins": 0.1184474378824234, + "rewards/rejected": -0.16547051072120667, + "step": 3610 + }, + { + "epoch": 1.89, + "learning_rate": 1.799321215760617e-07, + "logits/chosen": 0.23150837421417236, + "logits/rejected": 0.23608848452568054, + "logps/chosen": -341.4847717285156, + "logps/rejected": -318.3338928222656, + "loss": 0.6492, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.11050182580947876, + "rewards/margins": 0.09023106098175049, + "rewards/rejected": -0.20073287189006805, + "step": 3620 + }, + { + "epoch": 1.9, + "learning_rate": 1.7847206125261476e-07, + "logits/chosen": 0.24327079951763153, + "logits/rejected": 0.22404679656028748, + "logps/chosen": -296.234619140625, + "logps/rejected": -340.331298828125, + "loss": 0.6503, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.12481401860713959, + "rewards/margins": 0.09092869609594345, + "rewards/rejected": -0.21574273705482483, + "step": 3630 + }, + { + "epoch": 1.9, + "learning_rate": 1.7701465335383148e-07, + "logits/chosen": 0.37283509969711304, + "logits/rejected": 0.30105775594711304, + "logps/chosen": -293.0348205566406, + "logps/rejected": -243.2663116455078, + "loss": 0.6599, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.12276791036128998, + "rewards/margins": 0.11833088099956512, + "rewards/rejected": -0.2410987913608551, + "step": 3640 + }, + { + "epoch": 1.91, + "learning_rate": 1.7555995192383377e-07, + "logits/chosen": 0.26368245482444763, + "logits/rejected": 0.2792828381061554, + "logps/chosen": -277.477294921875, + "logps/rejected": -258.3664245605469, + "loss": 0.653, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1236424446105957, + "rewards/margins": 0.10521407425403595, + "rewards/rejected": -0.22885651886463165, + "step": 3650 + }, + { + "epoch": 1.92, + "learning_rate": 1.7410801090638166e-07, + "logits/chosen": 0.18897534906864166, + "logits/rejected": 0.2537173330783844, + "logps/chosen": -389.4532165527344, + "logps/rejected": -336.89990234375, + "loss": 0.6551, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.0968824028968811, + "rewards/margins": 0.10752584040164948, + "rewards/rejected": -0.20440824329853058, + "step": 3660 + }, + { + "epoch": 1.92, + "learning_rate": 1.7265888414287245e-07, + "logits/chosen": 0.2385127991437912, + "logits/rejected": 0.22628657519817352, + "logps/chosen": -323.94769287109375, + "logps/rejected": -265.17169189453125, + "loss": 0.6509, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.16426163911819458, + "rewards/margins": 0.11096008121967316, + "rewards/rejected": -0.27522170543670654, + "step": 3670 + }, + { + "epoch": 1.93, + "learning_rate": 1.7121262537034396e-07, + "logits/chosen": 0.2674527168273926, + "logits/rejected": 0.2606186270713806, + "logps/chosen": -376.69281005859375, + "logps/rejected": -302.48626708984375, + "loss": 0.66, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.1338208019733429, + "rewards/margins": 0.10959096997976303, + "rewards/rejected": -0.24341173470020294, + "step": 3680 + }, + { + "epoch": 1.93, + "learning_rate": 1.697692882194826e-07, + "logits/chosen": 0.3283900320529938, + "logits/rejected": 0.3107720613479614, + "logps/chosen": -319.2773742675781, + "logps/rejected": -296.90130615234375, + "loss": 0.6556, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.09138993918895721, + "rewards/margins": 0.09348934888839722, + "rewards/rejected": -0.18487928807735443, + "step": 3690 + }, + { + "epoch": 1.94, + "learning_rate": 1.6832892621263406e-07, + "logits/chosen": 0.2618725597858429, + "logits/rejected": 0.22988107800483704, + "logps/chosen": -348.5962829589844, + "logps/rejected": -358.07794189453125, + "loss": 0.6544, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.14753267168998718, + "rewards/margins": 0.05720914527773857, + "rewards/rejected": -0.20474180579185486, + "step": 3700 + }, + { + "epoch": 1.94, + "learning_rate": 1.668915927618183e-07, + "logits/chosen": 0.29424089193344116, + "logits/rejected": 0.3408924639225006, + "logps/chosen": -281.04449462890625, + "logps/rejected": -266.5536804199219, + "loss": 0.657, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.1075427383184433, + "rewards/margins": 0.053296517580747604, + "rewards/rejected": -0.1608392596244812, + "step": 3710 + }, + { + "epoch": 1.95, + "learning_rate": 1.6545734116674965e-07, + "logits/chosen": 0.2831776738166809, + "logits/rejected": 0.296794056892395, + "logps/chosen": -335.51214599609375, + "logps/rejected": -404.1058654785156, + "loss": 0.6519, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.14560511708259583, + "rewards/margins": 0.07225475460290909, + "rewards/rejected": -0.2178598940372467, + "step": 3720 + }, + { + "epoch": 1.95, + "learning_rate": 1.6402622461286e-07, + "logits/chosen": 0.305799663066864, + "logits/rejected": 0.3058302700519562, + "logps/chosen": -323.1365661621094, + "logps/rejected": -298.42730712890625, + "loss": 0.6494, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.13549456000328064, + "rewards/margins": 0.08603169769048691, + "rewards/rejected": -0.22152626514434814, + "step": 3730 + }, + { + "epoch": 1.96, + "learning_rate": 1.625982961693262e-07, + "logits/chosen": 0.2127005159854889, + "logits/rejected": 0.20395474135875702, + "logps/chosen": -293.8401794433594, + "logps/rejected": -282.7629699707031, + "loss": 0.655, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.1535455286502838, + "rewards/margins": 0.06461331248283386, + "rewards/rejected": -0.21815884113311768, + "step": 3740 + }, + { + "epoch": 1.96, + "learning_rate": 1.6117360878710266e-07, + "logits/chosen": 0.26588043570518494, + "logits/rejected": 0.25278085470199585, + "logps/chosen": -299.72467041015625, + "logps/rejected": -284.97003173828125, + "loss": 0.6608, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.15774992108345032, + "rewards/margins": 0.09110499918460846, + "rewards/rejected": -0.24885492026805878, + "step": 3750 + }, + { + "epoch": 1.97, + "learning_rate": 1.5975221529695773e-07, + "logits/chosen": 0.27109450101852417, + "logits/rejected": 0.27076664566993713, + "logps/chosen": -289.7879638671875, + "logps/rejected": -277.06243896484375, + "loss": 0.6549, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.11164456605911255, + "rewards/margins": 0.06714334338903427, + "rewards/rejected": -0.17878788709640503, + "step": 3760 + }, + { + "epoch": 1.97, + "learning_rate": 1.5833416840751406e-07, + "logits/chosen": 0.22059431672096252, + "logits/rejected": 0.23375996947288513, + "logps/chosen": -327.8553161621094, + "logps/rejected": -280.8095397949219, + "loss": 0.6518, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.10653182119131088, + "rewards/margins": 0.07202602922916412, + "rewards/rejected": -0.1785578578710556, + "step": 3770 + }, + { + "epoch": 1.98, + "learning_rate": 1.5691952070329493e-07, + "logits/chosen": 0.2996228337287903, + "logits/rejected": 0.33212026953697205, + "logps/chosen": -313.38653564453125, + "logps/rejected": -257.5924072265625, + "loss": 0.6558, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.11817840486764908, + "rewards/margins": 0.08965203911066055, + "rewards/rejected": -0.20783045887947083, + "step": 3780 + }, + { + "epoch": 1.98, + "learning_rate": 1.555083246427734e-07, + "logits/chosen": 0.21885398030281067, + "logits/rejected": 0.29226353764533997, + "logps/chosen": -264.7387390136719, + "logps/rejected": -311.4655456542969, + "loss": 0.6444, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.12575462460517883, + "rewards/margins": 0.11083336919546127, + "rewards/rejected": -0.2365880012512207, + "step": 3790 + }, + { + "epoch": 1.99, + "learning_rate": 1.5410063255642767e-07, + "logits/chosen": 0.3748754858970642, + "logits/rejected": 0.32010817527770996, + "logps/chosen": -295.5055847167969, + "logps/rejected": -278.5317687988281, + "loss": 0.6579, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.10374332964420319, + "rewards/margins": 0.1423131227493286, + "rewards/rejected": -0.24605640769004822, + "step": 3800 + }, + { + "epoch": 1.99, + "learning_rate": 1.5269649664480037e-07, + "logits/chosen": 0.1914290487766266, + "logits/rejected": 0.2500323951244354, + "logps/chosen": -292.2693786621094, + "logps/rejected": -354.45672607421875, + "loss": 0.6727, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.16124172508716583, + "rewards/margins": 0.0014358393382281065, + "rewards/rejected": -0.1626775562763214, + "step": 3810 + }, + { + "epoch": 2.0, + "learning_rate": 1.5129596897656255e-07, + "logits/chosen": 0.30393776297569275, + "logits/rejected": 0.3444197177886963, + "logps/chosen": -329.9333190917969, + "logps/rejected": -295.95538330078125, + "loss": 0.6499, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.10849185287952423, + "rewards/margins": 0.10884352028369904, + "rewards/rejected": -0.21733537316322327, + "step": 3820 + }, + { + "epoch": 2.0, + "learning_rate": 1.4989910148658324e-07, + "logits/chosen": 0.24706992506980896, + "logits/rejected": 0.261945903301239, + "logps/chosen": -303.5214538574219, + "logps/rejected": -291.674072265625, + "loss": 0.6564, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.11958847939968109, + "rewards/margins": 0.09228087961673737, + "rewards/rejected": -0.21186935901641846, + "step": 3830 + }, + { + "epoch": 2.01, + "learning_rate": 1.485059459740035e-07, + "logits/chosen": 0.1678394377231598, + "logits/rejected": 0.15751591324806213, + "logps/chosen": -323.06304931640625, + "logps/rejected": -313.9187316894531, + "loss": 0.6445, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.18193499743938446, + "rewards/margins": 0.07315002381801605, + "rewards/rejected": -0.2550850212574005, + "step": 3840 + }, + { + "epoch": 2.01, + "learning_rate": 1.4711655410031536e-07, + "logits/chosen": 0.28335142135620117, + "logits/rejected": 0.3012840151786804, + "logps/chosen": -286.52984619140625, + "logps/rejected": -262.88568115234375, + "loss": 0.6503, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10715119540691376, + "rewards/margins": 0.12121255695819855, + "rewards/rejected": -0.2283637523651123, + "step": 3850 + }, + { + "epoch": 2.02, + "learning_rate": 1.4573097738744623e-07, + "logits/chosen": 0.2741647958755493, + "logits/rejected": 0.2756713628768921, + "logps/chosen": -360.9956359863281, + "logps/rejected": -328.37017822265625, + "loss": 0.6633, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.13849574327468872, + "rewards/margins": 0.07003328949213028, + "rewards/rejected": -0.2085290253162384, + "step": 3860 + }, + { + "epoch": 2.03, + "learning_rate": 1.4434926721584865e-07, + "logits/chosen": 0.34434953331947327, + "logits/rejected": 0.35850682854652405, + "logps/chosen": -274.724853515625, + "logps/rejected": -265.60101318359375, + "loss": 0.6545, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.13894551992416382, + "rewards/margins": 0.06858086585998535, + "rewards/rejected": -0.20752640068531036, + "step": 3870 + }, + { + "epoch": 2.03, + "learning_rate": 1.4297147482259424e-07, + "logits/chosen": 0.3271617293357849, + "logits/rejected": 0.3250243067741394, + "logps/chosen": -335.4681091308594, + "logps/rejected": -310.607421875, + "loss": 0.6536, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.13369736075401306, + "rewards/margins": 0.0888337790966034, + "rewards/rejected": -0.22253112494945526, + "step": 3880 + }, + { + "epoch": 2.04, + "learning_rate": 1.4159765129947443e-07, + "logits/chosen": 0.28231456875801086, + "logits/rejected": 0.2409648895263672, + "logps/chosen": -318.02203369140625, + "logps/rejected": -273.2533874511719, + "loss": 0.6575, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.10755988210439682, + "rewards/margins": 0.09727488458156586, + "rewards/rejected": -0.20483477413654327, + "step": 3890 + }, + { + "epoch": 2.04, + "learning_rate": 1.4022784759110576e-07, + "logits/chosen": 0.2656182050704956, + "logits/rejected": 0.2702781558036804, + "logps/chosen": -278.57379150390625, + "logps/rejected": -280.07281494140625, + "loss": 0.6533, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.15303316712379456, + "rewards/margins": 0.10009218752384186, + "rewards/rejected": -0.2531253397464752, + "step": 3900 + }, + { + "epoch": 2.05, + "learning_rate": 1.3886211449304002e-07, + "logits/chosen": 0.2852250933647156, + "logits/rejected": 0.28865867853164673, + "logps/chosen": -320.269775390625, + "logps/rejected": -292.2712707519531, + "loss": 0.646, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.12158701568841934, + "rewards/margins": 0.09683831036090851, + "rewards/rejected": -0.21842531859874725, + "step": 3910 + }, + { + "epoch": 2.05, + "learning_rate": 1.3750050264988172e-07, + "logits/chosen": 0.31218335032463074, + "logits/rejected": 0.3479720950126648, + "logps/chosen": -265.9737854003906, + "logps/rejected": -305.076171875, + "loss": 0.6711, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.13722750544548035, + "rewards/margins": 0.03017706237733364, + "rewards/rejected": -0.16740456223487854, + "step": 3920 + }, + { + "epoch": 2.06, + "learning_rate": 1.3614306255340918e-07, + "logits/chosen": 0.23540154099464417, + "logits/rejected": 0.26212871074676514, + "logps/chosen": -324.95550537109375, + "logps/rejected": -311.8691101074219, + "loss": 0.6534, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.1239587664604187, + "rewards/margins": 0.09770622104406357, + "rewards/rejected": -0.22166499495506287, + "step": 3930 + }, + { + "epoch": 2.06, + "learning_rate": 1.347898445407027e-07, + "logits/chosen": 0.3467102646827698, + "logits/rejected": 0.36723384261131287, + "logps/chosen": -361.93487548828125, + "logps/rejected": -322.19219970703125, + "loss": 0.6581, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.14353439211845398, + "rewards/margins": 0.07150904834270477, + "rewards/rejected": -0.21504342555999756, + "step": 3940 + }, + { + "epoch": 2.07, + "learning_rate": 1.3344089879227768e-07, + "logits/chosen": 0.3199073374271393, + "logits/rejected": 0.3352632522583008, + "logps/chosen": -267.867919921875, + "logps/rejected": -305.97332763671875, + "loss": 0.6583, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.11239242553710938, + "rewards/margins": 0.11769165843725204, + "rewards/rejected": -0.23008409142494202, + "step": 3950 + }, + { + "epoch": 2.07, + "learning_rate": 1.3209627533022393e-07, + "logits/chosen": 0.15640456974506378, + "logits/rejected": 0.1485460102558136, + "logps/chosen": -339.8058166503906, + "logps/rejected": -301.31085205078125, + "loss": 0.6564, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.1585397869348526, + "rewards/margins": 0.0327836349606514, + "rewards/rejected": -0.1913234144449234, + "step": 3960 + }, + { + "epoch": 2.08, + "learning_rate": 1.3075602401635056e-07, + "logits/chosen": 0.4258531630039215, + "logits/rejected": 0.3634311556816101, + "logps/chosen": -379.2318420410156, + "logps/rejected": -312.0608215332031, + "loss": 0.6613, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09157432615756989, + "rewards/margins": 0.12343801558017731, + "rewards/rejected": -0.2150123119354248, + "step": 3970 + }, + { + "epoch": 2.08, + "learning_rate": 1.2942019455033715e-07, + "logits/chosen": 0.2574451267719269, + "logits/rejected": 0.2524639666080475, + "logps/chosen": -277.36871337890625, + "logps/rejected": -278.53826904296875, + "loss": 0.6593, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.13917557895183563, + "rewards/margins": 0.034988995641469955, + "rewards/rejected": -0.1741645783185959, + "step": 3980 + }, + { + "epoch": 2.09, + "learning_rate": 1.2808883646789088e-07, + "logits/chosen": 0.3025432229042053, + "logits/rejected": 0.2939312160015106, + "logps/chosen": -272.0987243652344, + "logps/rejected": -242.9075164794922, + "loss": 0.6586, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.10569024085998535, + "rewards/margins": 0.07147324085235596, + "rewards/rejected": -0.1771634817123413, + "step": 3990 + }, + { + "epoch": 2.09, + "learning_rate": 1.2676199913890933e-07, + "logits/chosen": 0.32760852575302124, + "logits/rejected": 0.2918204367160797, + "logps/chosen": -307.29669189453125, + "logps/rejected": -285.5381164550781, + "loss": 0.65, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.13056454062461853, + "rewards/margins": 0.0760079026222229, + "rewards/rejected": -0.20657244324684143, + "step": 4000 + }, + { + "epoch": 2.1, + "learning_rate": 1.2543973176565012e-07, + "logits/chosen": 0.2552763521671295, + "logits/rejected": 0.27753502130508423, + "logps/chosen": -328.93402099609375, + "logps/rejected": -296.59820556640625, + "loss": 0.6489, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.11348460614681244, + "rewards/margins": 0.07869541645050049, + "rewards/rejected": -0.19218002259731293, + "step": 4010 + }, + { + "epoch": 2.1, + "learning_rate": 1.2412208338090565e-07, + "logits/chosen": 0.32259517908096313, + "logits/rejected": 0.34156057238578796, + "logps/chosen": -317.1317138671875, + "logps/rejected": -321.1855163574219, + "loss": 0.6582, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.1507299840450287, + "rewards/margins": 0.08442052453756332, + "rewards/rejected": -0.2351505309343338, + "step": 4020 + }, + { + "epoch": 2.11, + "learning_rate": 1.228091028461858e-07, + "logits/chosen": 0.3298514783382416, + "logits/rejected": 0.3019588887691498, + "logps/chosen": -279.78802490234375, + "logps/rejected": -285.08465576171875, + "loss": 0.6508, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.17347638309001923, + "rewards/margins": 0.06616106629371643, + "rewards/rejected": -0.23963744938373566, + "step": 4030 + }, + { + "epoch": 2.11, + "learning_rate": 1.2150083884990536e-07, + "logits/chosen": 0.1470915973186493, + "logits/rejected": 0.19251108169555664, + "logps/chosen": -348.84246826171875, + "logps/rejected": -342.70941162109375, + "loss": 0.6618, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.13458314538002014, + "rewards/margins": 0.08001428842544556, + "rewards/rejected": -0.2145974189043045, + "step": 4040 + }, + { + "epoch": 2.12, + "learning_rate": 1.201973399055788e-07, + "logits/chosen": 0.22867533564567566, + "logits/rejected": 0.2250591516494751, + "logps/chosen": -297.4351501464844, + "logps/rejected": -297.4842529296875, + "loss": 0.6515, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.13157221674919128, + "rewards/margins": 0.08421944081783295, + "rewards/rejected": -0.21579165756702423, + "step": 4050 + }, + { + "epoch": 2.12, + "learning_rate": 1.1889865435002117e-07, + "logits/chosen": 0.23430044949054718, + "logits/rejected": 0.22815270721912384, + "logps/chosen": -323.7678527832031, + "logps/rejected": -262.4975891113281, + "loss": 0.6544, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.11733143031597137, + "rewards/margins": 0.09236739575862885, + "rewards/rejected": -0.20969882607460022, + "step": 4060 + }, + { + "epoch": 2.13, + "learning_rate": 1.1760483034155588e-07, + "logits/chosen": 0.25802451372146606, + "logits/rejected": 0.29646921157836914, + "logps/chosen": -344.3500061035156, + "logps/rejected": -366.99029541015625, + "loss": 0.6516, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.16074565052986145, + "rewards/margins": 0.05782736465334892, + "rewards/rejected": -0.21857304871082306, + "step": 4070 + }, + { + "epoch": 2.14, + "learning_rate": 1.163159158582284e-07, + "logits/chosen": 0.27491092681884766, + "logits/rejected": 0.29113298654556274, + "logps/chosen": -289.4082946777344, + "logps/rejected": -281.1566467285156, + "loss": 0.6498, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.12462715059518814, + "rewards/margins": 0.082103431224823, + "rewards/rejected": -0.20673055946826935, + "step": 4080 + }, + { + "epoch": 2.14, + "learning_rate": 1.1503195869602766e-07, + "logits/chosen": 0.35794973373413086, + "logits/rejected": 0.3533916771411896, + "logps/chosen": -293.63299560546875, + "logps/rejected": -263.76715087890625, + "loss": 0.6564, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.16337043046951294, + "rewards/margins": 0.029357850551605225, + "rewards/rejected": -0.19272826611995697, + "step": 4090 + }, + { + "epoch": 2.15, + "learning_rate": 1.137530064671135e-07, + "logits/chosen": 0.27308765053749084, + "logits/rejected": 0.28640851378440857, + "logps/chosen": -334.1259765625, + "logps/rejected": -294.29638671875, + "loss": 0.6408, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.10533700883388519, + "rewards/margins": 0.09341181814670563, + "rewards/rejected": -0.19874884188175201, + "step": 4100 + }, + { + "epoch": 2.15, + "learning_rate": 1.1247910659805063e-07, + "logits/chosen": 0.27220582962036133, + "logits/rejected": 0.23189368844032288, + "logps/chosen": -309.3544616699219, + "logps/rejected": -294.084716796875, + "loss": 0.6583, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.17769691348075867, + "rewards/margins": 0.08467571437358856, + "rewards/rejected": -0.2623726427555084, + "step": 4110 + }, + { + "epoch": 2.16, + "learning_rate": 1.112103063280509e-07, + "logits/chosen": 0.18239018321037292, + "logits/rejected": 0.19460837543010712, + "logps/chosen": -306.71697998046875, + "logps/rejected": -295.2325744628906, + "loss": 0.6573, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.15410485863685608, + "rewards/margins": 0.06368996202945709, + "rewards/rejected": -0.21779482066631317, + "step": 4120 + }, + { + "epoch": 2.16, + "learning_rate": 1.099466527072207e-07, + "logits/chosen": 0.3449271023273468, + "logits/rejected": 0.3305651545524597, + "logps/chosen": -257.5208435058594, + "logps/rejected": -210.81307983398438, + "loss": 0.6487, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.13710053265094757, + "rewards/margins": 0.09004830569028854, + "rewards/rejected": -0.2271488457918167, + "step": 4130 + }, + { + "epoch": 2.17, + "learning_rate": 1.0868819259481638e-07, + "logits/chosen": 0.3679484724998474, + "logits/rejected": 0.2941407561302185, + "logps/chosen": -334.38861083984375, + "logps/rejected": -252.18588256835938, + "loss": 0.6584, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.10539355129003525, + "rewards/margins": 0.1011449545621872, + "rewards/rejected": -0.20653851330280304, + "step": 4140 + }, + { + "epoch": 2.17, + "learning_rate": 1.0743497265750701e-07, + "logits/chosen": 0.33962732553482056, + "logits/rejected": 0.3592199683189392, + "logps/chosen": -265.0174255371094, + "logps/rejected": -267.397705078125, + "loss": 0.647, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.14396080374717712, + "rewards/margins": 0.08976936340332031, + "rewards/rejected": -0.23373015224933624, + "step": 4150 + }, + { + "epoch": 2.18, + "learning_rate": 1.0618703936764359e-07, + "logits/chosen": 0.35714811086654663, + "logits/rejected": 0.3340142071247101, + "logps/chosen": -285.12762451171875, + "logps/rejected": -312.48602294921875, + "loss": 0.6556, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.10041477531194687, + "rewards/margins": 0.1002071350812912, + "rewards/rejected": -0.20062191784381866, + "step": 4160 + }, + { + "epoch": 2.18, + "learning_rate": 1.0494443900153557e-07, + "logits/chosen": 0.3087230324745178, + "logits/rejected": 0.3323986530303955, + "logps/chosen": -290.55926513671875, + "logps/rejected": -333.17535400390625, + "loss": 0.6523, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.15257273614406586, + "rewards/margins": 0.070375956594944, + "rewards/rejected": -0.22294867038726807, + "step": 4170 + }, + { + "epoch": 2.19, + "learning_rate": 1.0370721763773507e-07, + "logits/chosen": 0.286038339138031, + "logits/rejected": 0.20693036913871765, + "logps/chosen": -388.9151306152344, + "logps/rejected": -281.81634521484375, + "loss": 0.6486, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.1107790619134903, + "rewards/margins": 0.12731772661209106, + "rewards/rejected": -0.23809678852558136, + "step": 4180 + }, + { + "epoch": 2.19, + "learning_rate": 1.0247542115532845e-07, + "logits/chosen": 0.24497263133525848, + "logits/rejected": 0.208289235830307, + "logps/chosen": -376.7227478027344, + "logps/rejected": -310.5379638671875, + "loss": 0.6539, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.08648854494094849, + "rewards/margins": 0.13909420371055603, + "rewards/rejected": -0.22558274865150452, + "step": 4190 + }, + { + "epoch": 2.2, + "learning_rate": 1.0124909523223418e-07, + "logits/chosen": 0.2840736210346222, + "logits/rejected": 0.35097819566726685, + "logps/chosen": -294.68548583984375, + "logps/rejected": -337.6000061035156, + "loss": 0.6501, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.15106235444545746, + "rewards/margins": 0.058377016335725784, + "rewards/rejected": -0.20943935215473175, + "step": 4200 + }, + { + "epoch": 2.2, + "learning_rate": 1.0002828534350987e-07, + "logits/chosen": 0.28108084201812744, + "logits/rejected": 0.2525596618652344, + "logps/chosen": -321.88232421875, + "logps/rejected": -342.23681640625, + "loss": 0.6445, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10137365758419037, + "rewards/margins": 0.17096036672592163, + "rewards/rejected": -0.2723340094089508, + "step": 4210 + }, + { + "epoch": 2.21, + "learning_rate": 9.881303675966524e-08, + "logits/chosen": 0.17838601768016815, + "logits/rejected": 0.16133739054203033, + "logps/chosen": -415.6875, + "logps/rejected": -328.04986572265625, + "loss": 0.656, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.092302106320858, + "rewards/margins": 0.1187206357717514, + "rewards/rejected": -0.21102270483970642, + "step": 4220 + }, + { + "epoch": 2.21, + "learning_rate": 9.760339454498393e-08, + "logits/chosen": 0.281174898147583, + "logits/rejected": 0.2819034457206726, + "logps/chosen": -254.9493408203125, + "logps/rejected": -267.9808044433594, + "loss": 0.652, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.10819119215011597, + "rewards/margins": 0.09994282573461533, + "rewards/rejected": -0.20813405513763428, + "step": 4230 + }, + { + "epoch": 2.22, + "learning_rate": 9.639940355585218e-08, + "logits/chosen": 0.2734231948852539, + "logits/rejected": 0.2930763363838196, + "logps/chosen": -333.05340576171875, + "logps/rejected": -247.87380981445312, + "loss": 0.6468, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.10973703861236572, + "rewards/margins": 0.13471439480781555, + "rewards/rejected": -0.24445144832134247, + "step": 4240 + }, + { + "epoch": 2.22, + "learning_rate": 9.52011084390954e-08, + "logits/chosen": 0.3024441599845886, + "logits/rejected": 0.3216271996498108, + "logps/chosen": -372.4168395996094, + "logps/rejected": -402.4332275390625, + "loss": 0.6543, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.12555637955665588, + "rewards/margins": 0.10845603048801422, + "rewards/rejected": -0.2340123951435089, + "step": 4250 + }, + { + "epoch": 2.23, + "learning_rate": 9.400855363032262e-08, + "logits/chosen": 0.33374837040901184, + "logits/rejected": 0.31805044412612915, + "logps/chosen": -293.5007629394531, + "logps/rejected": -289.728515625, + "loss": 0.6391, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.1061309352517128, + "rewards/margins": 0.16277316212654114, + "rewards/rejected": -0.26890408992767334, + "step": 4260 + }, + { + "epoch": 2.23, + "learning_rate": 9.282178335227883e-08, + "logits/chosen": 0.2601068615913391, + "logits/rejected": 0.3062272071838379, + "logps/chosen": -265.8717956542969, + "logps/rejected": -276.9969177246094, + "loss": 0.6451, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.16810034215450287, + "rewards/margins": 0.1246415227651596, + "rewards/rejected": -0.2927418649196625, + "step": 4270 + }, + { + "epoch": 2.24, + "learning_rate": 9.164084161320471e-08, + "logits/chosen": 0.30895930528640747, + "logits/rejected": 0.340026319026947, + "logps/chosen": -249.67147827148438, + "logps/rejected": -213.41751098632812, + "loss": 0.665, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.11838191747665405, + "rewards/margins": 0.10098621994256973, + "rewards/rejected": -0.2193681299686432, + "step": 4280 + }, + { + "epoch": 2.24, + "learning_rate": 9.046577220520518e-08, + "logits/chosen": 0.27443909645080566, + "logits/rejected": 0.25630897283554077, + "logps/chosen": -335.86541748046875, + "logps/rejected": -314.80230712890625, + "loss": 0.6585, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.1495860517024994, + "rewards/margins": 0.08008397370576859, + "rewards/rejected": -0.22967001795768738, + "step": 4290 + }, + { + "epoch": 2.25, + "learning_rate": 8.929661870262525e-08, + "logits/chosen": 0.2684154808521271, + "logits/rejected": 0.24717223644256592, + "logps/chosen": -264.81390380859375, + "logps/rejected": -209.41360473632812, + "loss": 0.644, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.15925763547420502, + "rewards/margins": 0.09572459757328033, + "rewards/rejected": -0.25498223304748535, + "step": 4300 + }, + { + "epoch": 2.26, + "learning_rate": 8.813342446043423e-08, + "logits/chosen": 0.24432964622974396, + "logits/rejected": 0.24687163531780243, + "logps/chosen": -261.1036071777344, + "logps/rejected": -251.00479125976562, + "loss": 0.6515, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.12970378994941711, + "rewards/margins": 0.0732387825846672, + "rewards/rejected": -0.20294256508350372, + "step": 4310 + }, + { + "epoch": 2.26, + "learning_rate": 8.697623261261788e-08, + "logits/chosen": 0.258579820394516, + "logits/rejected": 0.2788470387458801, + "logps/chosen": -312.0073547363281, + "logps/rejected": -281.97882080078125, + "loss": 0.659, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.10256769508123398, + "rewards/margins": 0.09709702432155609, + "rewards/rejected": -0.19966474175453186, + "step": 4320 + }, + { + "epoch": 2.27, + "learning_rate": 8.58250860705792e-08, + "logits/chosen": 0.3692210018634796, + "logits/rejected": 0.33054882287979126, + "logps/chosen": -364.50201416015625, + "logps/rejected": -311.98626708984375, + "loss": 0.638, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.12589401006698608, + "rewards/margins": 0.0628051906824112, + "rewards/rejected": -0.18869920074939728, + "step": 4330 + }, + { + "epoch": 2.27, + "learning_rate": 8.468002752154671e-08, + "logits/chosen": 0.28316354751586914, + "logits/rejected": 0.32993918657302856, + "logps/chosen": -272.2777404785156, + "logps/rejected": -237.9681396484375, + "loss": 0.6455, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.15095901489257812, + "rewards/margins": 0.08649053424596786, + "rewards/rejected": -0.2374495565891266, + "step": 4340 + }, + { + "epoch": 2.28, + "learning_rate": 8.354109942699208e-08, + "logits/chosen": 0.2612248957157135, + "logits/rejected": 0.24447908997535706, + "logps/chosen": -295.1114807128906, + "logps/rejected": -305.5965881347656, + "loss": 0.6536, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.1192585676908493, + "rewards/margins": 0.10337366163730621, + "rewards/rejected": -0.2226322442293167, + "step": 4350 + }, + { + "epoch": 2.28, + "learning_rate": 8.240834402105524e-08, + "logits/chosen": 0.3573678135871887, + "logits/rejected": 0.28943532705307007, + "logps/chosen": -316.6932678222656, + "logps/rejected": -276.90496826171875, + "loss": 0.6581, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.1998998373746872, + "rewards/margins": 0.03210686147212982, + "rewards/rejected": -0.23200669884681702, + "step": 4360 + }, + { + "epoch": 2.29, + "learning_rate": 8.128180330897791e-08, + "logits/chosen": 0.3203149437904358, + "logits/rejected": 0.2803335189819336, + "logps/chosen": -309.9126892089844, + "logps/rejected": -289.2821960449219, + "loss": 0.6531, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.1578475385904312, + "rewards/margins": 0.09269069135189056, + "rewards/rejected": -0.2505382299423218, + "step": 4370 + }, + { + "epoch": 2.29, + "learning_rate": 8.016151906554683e-08, + "logits/chosen": 0.2752537131309509, + "logits/rejected": 0.27471452951431274, + "logps/chosen": -328.5307312011719, + "logps/rejected": -294.3258056640625, + "loss": 0.6516, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.21460950374603271, + "rewards/margins": 0.034757621586322784, + "rewards/rejected": -0.2493671178817749, + "step": 4380 + }, + { + "epoch": 2.3, + "learning_rate": 7.90475328335439e-08, + "logits/chosen": 0.2595667243003845, + "logits/rejected": 0.26930028200149536, + "logps/chosen": -326.0294494628906, + "logps/rejected": -378.00164794921875, + "loss": 0.6541, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.13298392295837402, + "rewards/margins": 0.12766322493553162, + "rewards/rejected": -0.26064714789390564, + "step": 4390 + }, + { + "epoch": 2.3, + "learning_rate": 7.793988592220568e-08, + "logits/chosen": 0.2611275315284729, + "logits/rejected": 0.22679242491722107, + "logps/chosen": -316.05780029296875, + "logps/rejected": -265.9347229003906, + "loss": 0.6459, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.11774953454732895, + "rewards/margins": 0.12449419498443604, + "rewards/rejected": -0.24224373698234558, + "step": 4400 + }, + { + "epoch": 2.31, + "learning_rate": 7.683861940569217e-08, + "logits/chosen": 0.2962859272956848, + "logits/rejected": 0.23215405642986298, + "logps/chosen": -333.14239501953125, + "logps/rejected": -311.7344665527344, + "loss": 0.6497, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.14002402126789093, + "rewards/margins": 0.11002373695373535, + "rewards/rejected": -0.2500477433204651, + "step": 4410 + }, + { + "epoch": 2.31, + "learning_rate": 7.574377412156291e-08, + "logits/chosen": 0.277981698513031, + "logits/rejected": 0.23015658557415009, + "logps/chosen": -309.0813903808594, + "logps/rejected": -304.1460266113281, + "loss": 0.6457, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.14443780481815338, + "rewards/margins": 0.1270671784877777, + "rewards/rejected": -0.2715049982070923, + "step": 4420 + }, + { + "epoch": 2.32, + "learning_rate": 7.465539066926322e-08, + "logits/chosen": 0.21482165157794952, + "logits/rejected": 0.2153225690126419, + "logps/chosen": -393.70172119140625, + "logps/rejected": -346.408935546875, + "loss": 0.6383, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.15823566913604736, + "rewards/margins": 0.10698536783456802, + "rewards/rejected": -0.2652210295200348, + "step": 4430 + }, + { + "epoch": 2.32, + "learning_rate": 7.357350940861845e-08, + "logits/chosen": 0.26978224515914917, + "logits/rejected": 0.2641783356666565, + "logps/chosen": -373.73516845703125, + "logps/rejected": -330.3535461425781, + "loss": 0.6499, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.1348724067211151, + "rewards/margins": 0.08027593791484833, + "rewards/rejected": -0.21514835953712463, + "step": 4440 + }, + { + "epoch": 2.33, + "learning_rate": 7.249817045833726e-08, + "logits/chosen": 0.20601427555084229, + "logits/rejected": 0.1943071484565735, + "logps/chosen": -355.9278259277344, + "logps/rejected": -310.63531494140625, + "loss": 0.6486, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.19451366364955902, + "rewards/margins": 0.06895452737808228, + "rewards/rejected": -0.2634682059288025, + "step": 4450 + }, + { + "epoch": 2.33, + "learning_rate": 7.14294136945241e-08, + "logits/chosen": 0.3372945487499237, + "logits/rejected": 0.28835657238960266, + "logps/chosen": -328.9299621582031, + "logps/rejected": -289.6046142578125, + "loss": 0.6494, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.11903943866491318, + "rewards/margins": 0.13281255960464478, + "rewards/rejected": -0.25185197591781616, + "step": 4460 + }, + { + "epoch": 2.34, + "learning_rate": 7.036727874920043e-08, + "logits/chosen": 0.2705017328262329, + "logits/rejected": 0.27911967039108276, + "logps/chosen": -290.66314697265625, + "logps/rejected": -310.8837890625, + "loss": 0.6378, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.16096973419189453, + "rewards/margins": 0.10622663795948029, + "rewards/rejected": -0.267196387052536, + "step": 4470 + }, + { + "epoch": 2.34, + "learning_rate": 6.931180500883484e-08, + "logits/chosen": 0.1446174830198288, + "logits/rejected": 0.12518611550331116, + "logps/chosen": -413.4287109375, + "logps/rejected": -355.8533020019531, + "loss": 0.6494, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.1334320604801178, + "rewards/margins": 0.12716497480869293, + "rewards/rejected": -0.2605970501899719, + "step": 4480 + }, + { + "epoch": 2.35, + "learning_rate": 6.826303161288302e-08, + "logits/chosen": 0.24317510426044464, + "logits/rejected": 0.22099463641643524, + "logps/chosen": -361.7783203125, + "logps/rejected": -312.4311218261719, + "loss": 0.6575, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.13776350021362305, + "rewards/margins": 0.14507371187210083, + "rewards/rejected": -0.2828372120857239, + "step": 4490 + }, + { + "epoch": 2.35, + "learning_rate": 6.722099745233594e-08, + "logits/chosen": 0.24744892120361328, + "logits/rejected": 0.25841349363327026, + "logps/chosen": -391.8916015625, + "logps/rejected": -308.3046569824219, + "loss": 0.6622, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13982141017913818, + "rewards/margins": 0.13293033838272095, + "rewards/rejected": -0.27275174856185913, + "step": 4500 + }, + { + "epoch": 2.36, + "learning_rate": 6.618574116827786e-08, + "logits/chosen": 0.20393629372119904, + "logits/rejected": 0.1978496015071869, + "logps/chosen": -324.3951721191406, + "logps/rejected": -361.393310546875, + "loss": 0.6595, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.14017212390899658, + "rewards/margins": 0.0554613396525383, + "rewards/rejected": -0.19563347101211548, + "step": 4510 + }, + { + "epoch": 2.37, + "learning_rate": 6.515730115045339e-08, + "logits/chosen": 0.2902334928512573, + "logits/rejected": 0.2500172257423401, + "logps/chosen": -343.683349609375, + "logps/rejected": -329.7452697753906, + "loss": 0.6488, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09550157934427261, + "rewards/margins": 0.15107180178165436, + "rewards/rejected": -0.24657340347766876, + "step": 4520 + }, + { + "epoch": 2.37, + "learning_rate": 6.413571553584399e-08, + "logits/chosen": 0.31443777680397034, + "logits/rejected": 0.298168420791626, + "logps/chosen": -301.1230163574219, + "logps/rejected": -361.3356018066406, + "loss": 0.6454, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.14689506590366364, + "rewards/margins": 0.13481785356998444, + "rewards/rejected": -0.28171294927597046, + "step": 4530 + }, + { + "epoch": 2.38, + "learning_rate": 6.312102220725346e-08, + "logits/chosen": 0.17988334596157074, + "logits/rejected": 0.23794107139110565, + "logps/chosen": -280.0445251464844, + "logps/rejected": -317.3033142089844, + "loss": 0.6398, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.18284687399864197, + "rewards/margins": 0.08084186166524887, + "rewards/rejected": -0.26368874311447144, + "step": 4540 + }, + { + "epoch": 2.38, + "learning_rate": 6.21132587919036e-08, + "logits/chosen": 0.2522971034049988, + "logits/rejected": 0.26374301314353943, + "logps/chosen": -305.38629150390625, + "logps/rejected": -303.3993225097656, + "loss": 0.6562, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.16436639428138733, + "rewards/margins": 0.09498941898345947, + "rewards/rejected": -0.2593558430671692, + "step": 4550 + }, + { + "epoch": 2.39, + "learning_rate": 6.111246266003859e-08, + "logits/chosen": 0.29947465658187866, + "logits/rejected": 0.2651771008968353, + "logps/chosen": -356.18914794921875, + "logps/rejected": -301.4688720703125, + "loss": 0.644, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.1766948699951172, + "rewards/margins": 0.05812176316976547, + "rewards/rejected": -0.23481664061546326, + "step": 4560 + }, + { + "epoch": 2.39, + "learning_rate": 6.011867092353934e-08, + "logits/chosen": 0.23344139754772186, + "logits/rejected": 0.24661417305469513, + "logps/chosen": -288.53033447265625, + "logps/rejected": -285.0890808105469, + "loss": 0.6587, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1591729074716568, + "rewards/margins": 0.07025494426488876, + "rewards/rejected": -0.22942781448364258, + "step": 4570 + }, + { + "epoch": 2.4, + "learning_rate": 5.9131920434547235e-08, + "logits/chosen": 0.33210188150405884, + "logits/rejected": 0.36312466859817505, + "logps/chosen": -242.6085968017578, + "logps/rejected": -289.72479248046875, + "loss": 0.6515, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11827802658081055, + "rewards/margins": 0.12428691238164902, + "rewards/rejected": -0.24256494641304016, + "step": 4580 + }, + { + "epoch": 2.4, + "learning_rate": 5.8152247784097664e-08, + "logits/chosen": 0.24385061860084534, + "logits/rejected": 0.29127827286720276, + "logps/chosen": -299.4405822753906, + "logps/rejected": -340.1574401855469, + "loss": 0.6569, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.17524193227291107, + "rewards/margins": 0.07939636707305908, + "rewards/rejected": -0.25463834404945374, + "step": 4590 + }, + { + "epoch": 2.41, + "learning_rate": 5.717968930076289e-08, + "logits/chosen": 0.28695303201675415, + "logits/rejected": 0.2945484519004822, + "logps/chosen": -341.9978332519531, + "logps/rejected": -280.802001953125, + "loss": 0.653, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.13538150489330292, + "rewards/margins": 0.14087039232254028, + "rewards/rejected": -0.276251882314682, + "step": 4600 + }, + { + "epoch": 2.41, + "learning_rate": 5.621428104930528e-08, + "logits/chosen": 0.2341652363538742, + "logits/rejected": 0.2110733687877655, + "logps/chosen": -276.9383239746094, + "logps/rejected": -275.8529357910156, + "loss": 0.6554, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.17822220921516418, + "rewards/margins": 0.0678471028804779, + "rewards/rejected": -0.2460693120956421, + "step": 4610 + }, + { + "epoch": 2.42, + "learning_rate": 5.525605882933965e-08, + "logits/chosen": 0.32365158200263977, + "logits/rejected": 0.24177177250385284, + "logps/chosen": -342.0087890625, + "logps/rejected": -319.028564453125, + "loss": 0.6471, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.11949370801448822, + "rewards/margins": 0.1479637771844864, + "rewards/rejected": -0.2674574851989746, + "step": 4620 + }, + { + "epoch": 2.42, + "learning_rate": 5.4305058174005853e-08, + "logits/chosen": 0.3449974060058594, + "logits/rejected": 0.2847765386104584, + "logps/chosen": -297.4521179199219, + "logps/rejected": -292.80194091796875, + "loss": 0.6543, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.15853652358055115, + "rewards/margins": 0.03810811787843704, + "rewards/rejected": -0.1966446191072464, + "step": 4630 + }, + { + "epoch": 2.43, + "learning_rate": 5.33613143486511e-08, + "logits/chosen": 0.3558960258960724, + "logits/rejected": 0.372741162776947, + "logps/chosen": -263.94915771484375, + "logps/rejected": -239.63687133789062, + "loss": 0.6526, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.1764945238828659, + "rewards/margins": 0.09427468478679657, + "rewards/rejected": -0.27076923847198486, + "step": 4640 + }, + { + "epoch": 2.43, + "learning_rate": 5.242486234952206e-08, + "logits/chosen": 0.2200225591659546, + "logits/rejected": 0.2050172984600067, + "logps/chosen": -323.3846130371094, + "logps/rejected": -237.43331909179688, + "loss": 0.6561, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.16000418365001678, + "rewards/margins": 0.11692575365304947, + "rewards/rejected": -0.27692991495132446, + "step": 4650 + }, + { + "epoch": 2.44, + "learning_rate": 5.149573690246758e-08, + "logits/chosen": 0.25134509801864624, + "logits/rejected": 0.3196043074131012, + "logps/chosen": -328.13555908203125, + "logps/rejected": -286.40325927734375, + "loss": 0.6535, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.17728720605373383, + "rewards/margins": 0.04581373184919357, + "rewards/rejected": -0.2231009304523468, + "step": 4660 + }, + { + "epoch": 2.44, + "learning_rate": 5.057397246165052e-08, + "logits/chosen": 0.24966394901275635, + "logits/rejected": 0.2608310580253601, + "logps/chosen": -335.91424560546875, + "logps/rejected": -341.7672119140625, + "loss": 0.6554, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.13174419105052948, + "rewards/margins": 0.08406993746757507, + "rewards/rejected": -0.21581411361694336, + "step": 4670 + }, + { + "epoch": 2.45, + "learning_rate": 4.9659603208270173e-08, + "logits/chosen": 0.20629934966564178, + "logits/rejected": 0.19532974064350128, + "logps/chosen": -393.67547607421875, + "logps/rejected": -367.861083984375, + "loss": 0.6388, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.07362738996744156, + "rewards/margins": 0.16155405342578888, + "rewards/rejected": -0.23518145084381104, + "step": 4680 + }, + { + "epoch": 2.45, + "learning_rate": 4.875266304929496e-08, + "logits/chosen": 0.28497210144996643, + "logits/rejected": 0.2833143472671509, + "logps/chosen": -315.1612854003906, + "logps/rejected": -271.2629089355469, + "loss": 0.6534, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.17749536037445068, + "rewards/margins": 0.0488092266023159, + "rewards/rejected": -0.2263045758008957, + "step": 4690 + }, + { + "epoch": 2.46, + "learning_rate": 4.785318561620511e-08, + "logits/chosen": 0.218004509806633, + "logits/rejected": 0.24838721752166748, + "logps/chosen": -409.41473388671875, + "logps/rejected": -329.8982238769531, + "loss": 0.6505, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.09751011431217194, + "rewards/margins": 0.14528706669807434, + "rewards/rejected": -0.24279718101024628, + "step": 4700 + }, + { + "epoch": 2.46, + "learning_rate": 4.696120426374503e-08, + "logits/chosen": 0.3327026069164276, + "logits/rejected": 0.30656957626342773, + "logps/chosen": -311.63763427734375, + "logps/rejected": -304.68841552734375, + "loss": 0.6527, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.16270048916339874, + "rewards/margins": 0.0921703428030014, + "rewards/rejected": -0.25487083196640015, + "step": 4710 + }, + { + "epoch": 2.47, + "learning_rate": 4.607675206868705e-08, + "logits/chosen": 0.22925233840942383, + "logits/rejected": 0.2312936782836914, + "logps/chosen": -353.39422607421875, + "logps/rejected": -296.1007995605469, + "loss": 0.6482, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.12219414860010147, + "rewards/margins": 0.14850768446922302, + "rewards/rejected": -0.2707018256187439, + "step": 4720 + }, + { + "epoch": 2.48, + "learning_rate": 4.519986182860452e-08, + "logits/chosen": 0.193797767162323, + "logits/rejected": 0.2653660774230957, + "logps/chosen": -295.244873046875, + "logps/rejected": -267.8289489746094, + "loss": 0.6456, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.1571170836687088, + "rewards/margins": 0.13725200295448303, + "rewards/rejected": -0.29436904191970825, + "step": 4730 + }, + { + "epoch": 2.48, + "learning_rate": 4.433056606065552e-08, + "logits/chosen": 0.20690850913524628, + "logits/rejected": 0.22397968173027039, + "logps/chosen": -328.7765808105469, + "logps/rejected": -320.0532531738281, + "loss": 0.6572, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.1482156366109848, + "rewards/margins": 0.09247883409261703, + "rewards/rejected": -0.24069447815418243, + "step": 4740 + }, + { + "epoch": 2.49, + "learning_rate": 4.3468897000377427e-08, + "logits/chosen": 0.2184235155582428, + "logits/rejected": 0.26873156428337097, + "logps/chosen": -253.054931640625, + "logps/rejected": -231.57522583007812, + "loss": 0.6571, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.1425979882478714, + "rewards/margins": 0.0733003169298172, + "rewards/rejected": -0.2158983051776886, + "step": 4750 + }, + { + "epoch": 2.49, + "learning_rate": 4.2614886600491115e-08, + "logits/chosen": 0.19274510443210602, + "logits/rejected": 0.13967491686344147, + "logps/chosen": -299.489013671875, + "logps/rejected": -248.2526092529297, + "loss": 0.6506, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.1602405607700348, + "rewards/margins": 0.08521406352519989, + "rewards/rejected": -0.24545462429523468, + "step": 4760 + }, + { + "epoch": 2.5, + "learning_rate": 4.1768566529716415e-08, + "logits/chosen": 0.258291095495224, + "logits/rejected": 0.2351008951663971, + "logps/chosen": -297.2149353027344, + "logps/rejected": -235.2070770263672, + "loss": 0.6552, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.14642244577407837, + "rewards/margins": 0.11697183549404144, + "rewards/rejected": -0.263394296169281, + "step": 4770 + }, + { + "epoch": 2.5, + "learning_rate": 4.0929968171597526e-08, + "logits/chosen": 0.30216288566589355, + "logits/rejected": 0.2607000768184662, + "logps/chosen": -351.5521545410156, + "logps/rejected": -286.664794921875, + "loss": 0.6603, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.13644321262836456, + "rewards/margins": 0.135478213429451, + "rewards/rejected": -0.27192145586013794, + "step": 4780 + }, + { + "epoch": 2.51, + "learning_rate": 4.009912262333942e-08, + "logits/chosen": 0.266966849565506, + "logits/rejected": 0.27799034118652344, + "logps/chosen": -301.3504638671875, + "logps/rejected": -274.5638427734375, + "loss": 0.6491, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.15286056697368622, + "rewards/margins": 0.0682164877653122, + "rewards/rejected": -0.2210770845413208, + "step": 4790 + }, + { + "epoch": 2.51, + "learning_rate": 3.927606069465442e-08, + "logits/chosen": 0.25001880526542664, + "logits/rejected": 0.2829858660697937, + "logps/chosen": -288.3041076660156, + "logps/rejected": -270.7674560546875, + "loss": 0.6472, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.11720645427703857, + "rewards/margins": 0.1586742252111435, + "rewards/rejected": -0.27588069438934326, + "step": 4800 + }, + { + "epoch": 2.52, + "learning_rate": 3.8460812906620037e-08, + "logits/chosen": 0.28133073449134827, + "logits/rejected": 0.225880429148674, + "logps/chosen": -277.67620849609375, + "logps/rejected": -286.3600158691406, + "loss": 0.6493, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.1528925597667694, + "rewards/margins": 0.10731463134288788, + "rewards/rejected": -0.2602071762084961, + "step": 4810 + }, + { + "epoch": 2.52, + "learning_rate": 3.765340949054696e-08, + "logits/chosen": 0.28822416067123413, + "logits/rejected": 0.2694561183452606, + "logps/chosen": -341.54034423828125, + "logps/rejected": -300.6506042480469, + "loss": 0.6472, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.14739778637886047, + "rewards/margins": 0.13927289843559265, + "rewards/rejected": -0.2866706848144531, + "step": 4820 + }, + { + "epoch": 2.53, + "learning_rate": 3.685388038685811e-08, + "logits/chosen": 0.25113362073898315, + "logits/rejected": 0.20978930592536926, + "logps/chosen": -391.368896484375, + "logps/rejected": -324.4923400878906, + "loss": 0.6436, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.14761051535606384, + "rewards/margins": 0.11779048293828964, + "rewards/rejected": -0.2654009759426117, + "step": 4830 + }, + { + "epoch": 2.53, + "learning_rate": 3.60622552439783e-08, + "logits/chosen": 0.30557817220687866, + "logits/rejected": 0.28471964597702026, + "logps/chosen": -309.66851806640625, + "logps/rejected": -264.75189208984375, + "loss": 0.6425, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.15169253945350647, + "rewards/margins": 0.07038528472185135, + "rewards/rejected": -0.22207781672477722, + "step": 4840 + }, + { + "epoch": 2.54, + "learning_rate": 3.527856341723479e-08, + "logits/chosen": 0.3501953184604645, + "logits/rejected": 0.36876240372657776, + "logps/chosen": -275.45587158203125, + "logps/rejected": -287.1562194824219, + "loss": 0.6401, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1547791212797165, + "rewards/margins": 0.11520648002624512, + "rewards/rejected": -0.2699856162071228, + "step": 4850 + }, + { + "epoch": 2.54, + "learning_rate": 3.4502833967768816e-08, + "logits/chosen": 0.260027140378952, + "logits/rejected": 0.2362249791622162, + "logps/chosen": -355.7345886230469, + "logps/rejected": -350.7608642578125, + "loss": 0.655, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.13119223713874817, + "rewards/margins": 0.1489580124616623, + "rewards/rejected": -0.28015023469924927, + "step": 4860 + }, + { + "epoch": 2.55, + "learning_rate": 3.373509566145793e-08, + "logits/chosen": 0.30141669511795044, + "logits/rejected": 0.26752427220344543, + "logps/chosen": -323.4403991699219, + "logps/rejected": -272.54815673828125, + "loss": 0.6551, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.15655481815338135, + "rewards/margins": 0.11246142536401749, + "rewards/rejected": -0.26901623606681824, + "step": 4870 + }, + { + "epoch": 2.55, + "learning_rate": 3.2975376967849104e-08, + "logits/chosen": 0.3648565411567688, + "logits/rejected": 0.3760683536529541, + "logps/chosen": -275.06890869140625, + "logps/rejected": -216.0530548095703, + "loss": 0.6525, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.13468894362449646, + "rewards/margins": 0.09779195487499237, + "rewards/rejected": -0.23248091340065002, + "step": 4880 + }, + { + "epoch": 2.56, + "learning_rate": 3.222370605910332e-08, + "logits/chosen": 0.2788364887237549, + "logits/rejected": 0.2764519155025482, + "logps/chosen": -305.3879699707031, + "logps/rejected": -265.95086669921875, + "loss": 0.6438, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.1286066770553589, + "rewards/margins": 0.139145165681839, + "rewards/rejected": -0.26775187253952026, + "step": 4890 + }, + { + "epoch": 2.56, + "learning_rate": 3.1480110808950746e-08, + "logits/chosen": 0.21629850566387177, + "logits/rejected": 0.2902226150035858, + "logps/chosen": -381.25604248046875, + "logps/rejected": -385.64727783203125, + "loss": 0.6333, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.09950147569179535, + "rewards/margins": 0.1605621874332428, + "rewards/rejected": -0.26006367802619934, + "step": 4900 + }, + { + "epoch": 2.57, + "learning_rate": 3.07446187916568e-08, + "logits/chosen": 0.2720637321472168, + "logits/rejected": 0.3146423399448395, + "logps/chosen": -367.902587890625, + "logps/rejected": -328.25006103515625, + "loss": 0.6523, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.138359934091568, + "rewards/margins": 0.11164422333240509, + "rewards/rejected": -0.2500041723251343, + "step": 4910 + }, + { + "epoch": 2.57, + "learning_rate": 3.001725728100021e-08, + "logits/chosen": 0.16783829033374786, + "logits/rejected": 0.21476595103740692, + "logps/chosen": -323.3216857910156, + "logps/rejected": -346.15203857421875, + "loss": 0.6429, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.1808640956878662, + "rewards/margins": 0.06437064707279205, + "rewards/rejected": -0.24523475766181946, + "step": 4920 + }, + { + "epoch": 2.58, + "learning_rate": 2.9298053249261238e-08, + "logits/chosen": 0.21411249041557312, + "logits/rejected": 0.2068692445755005, + "logps/chosen": -301.1133728027344, + "logps/rejected": -324.4837646484375, + "loss": 0.6585, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.18682220578193665, + "rewards/margins": 0.061571698635816574, + "rewards/rejected": -0.24839389324188232, + "step": 4930 + }, + { + "epoch": 2.59, + "learning_rate": 2.8587033366221534e-08, + "logits/chosen": 0.31188955903053284, + "logits/rejected": 0.3038763999938965, + "logps/chosen": -335.08197021484375, + "logps/rejected": -356.7730712890625, + "loss": 0.6412, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.13679111003875732, + "rewards/margins": 0.11448071151971817, + "rewards/rejected": -0.2512718141078949, + "step": 4940 + }, + { + "epoch": 2.59, + "learning_rate": 2.7884223998175248e-08, + "logits/chosen": 0.316620409488678, + "logits/rejected": 0.3149321675300598, + "logps/chosen": -250.12158203125, + "logps/rejected": -260.7198486328125, + "loss": 0.6407, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.18968793749809265, + "rewards/margins": 0.04125380888581276, + "rewards/rejected": -0.2309417724609375, + "step": 4950 + }, + { + "epoch": 2.6, + "learning_rate": 2.718965120695141e-08, + "logits/chosen": 0.21362006664276123, + "logits/rejected": 0.24621066451072693, + "logps/chosen": -362.2911682128906, + "logps/rejected": -329.33746337890625, + "loss": 0.6531, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.1833564043045044, + "rewards/margins": 0.039215873926877975, + "rewards/rejected": -0.22257229685783386, + "step": 4960 + }, + { + "epoch": 2.6, + "learning_rate": 2.6503340748947083e-08, + "logits/chosen": 0.30787166953086853, + "logits/rejected": 0.2651337683200836, + "logps/chosen": -365.96856689453125, + "logps/rejected": -298.203369140625, + "loss": 0.6515, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.08608182519674301, + "rewards/margins": 0.17453762888908386, + "rewards/rejected": -0.26061946153640747, + "step": 4970 + }, + { + "epoch": 2.61, + "learning_rate": 2.5825318074172763e-08, + "logits/chosen": 0.3091648817062378, + "logits/rejected": 0.3526086211204529, + "logps/chosen": -330.2383117675781, + "logps/rejected": -311.9805908203125, + "loss": 0.6627, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.18691927194595337, + "rewards/margins": 0.04171084240078926, + "rewards/rejected": -0.22863011062145233, + "step": 4980 + }, + { + "epoch": 2.61, + "learning_rate": 2.5155608325308358e-08, + "logits/chosen": 0.3669896423816681, + "logits/rejected": 0.34136396646499634, + "logps/chosen": -289.3947448730469, + "logps/rejected": -231.5489044189453, + "loss": 0.6519, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.15817251801490784, + "rewards/margins": 0.06915486603975296, + "rewards/rejected": -0.2273273766040802, + "step": 4990 + }, + { + "epoch": 2.62, + "learning_rate": 2.4494236336770695e-08, + "logits/chosen": 0.23264248669147491, + "logits/rejected": 0.28665319085121155, + "logps/chosen": -327.484375, + "logps/rejected": -304.4945983886719, + "loss": 0.6509, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10859771817922592, + "rewards/margins": 0.13269570469856262, + "rewards/rejected": -0.24129343032836914, + "step": 5000 + }, + { + "epoch": 2.62, + "learning_rate": 2.3841226633792983e-08, + "logits/chosen": 0.38944217562675476, + "logits/rejected": 0.4039441645145416, + "logps/chosen": -265.03985595703125, + "logps/rejected": -283.97796630859375, + "loss": 0.6588, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.17852087318897247, + "rewards/margins": 0.06068998575210571, + "rewards/rejected": -0.23921087384223938, + "step": 5010 + }, + { + "epoch": 2.63, + "learning_rate": 2.319660343151511e-08, + "logits/chosen": 0.22561486065387726, + "logits/rejected": 0.269603431224823, + "logps/chosen": -277.7508850097656, + "logps/rejected": -330.13018798828125, + "loss": 0.6513, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.16024520993232727, + "rewards/margins": 0.09627407789230347, + "rewards/rejected": -0.25651925802230835, + "step": 5020 + }, + { + "epoch": 2.63, + "learning_rate": 2.2560390634085715e-08, + "logits/chosen": 0.2743435800075531, + "logits/rejected": 0.2494051158428192, + "logps/chosen": -335.19024658203125, + "logps/rejected": -276.3586730957031, + "loss": 0.6423, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.1546277403831482, + "rewards/margins": 0.1273549497127533, + "rewards/rejected": -0.2819827198982239, + "step": 5030 + }, + { + "epoch": 2.64, + "learning_rate": 2.1932611833775843e-08, + "logits/chosen": 0.31680962443351746, + "logits/rejected": 0.3296371102333069, + "logps/chosen": -241.97036743164062, + "logps/rejected": -226.87362670898438, + "loss": 0.6501, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.15829713642597198, + "rewards/margins": 0.05115853622555733, + "rewards/rejected": -0.2094556987285614, + "step": 5040 + }, + { + "epoch": 2.64, + "learning_rate": 2.1313290310103897e-08, + "logits/chosen": 0.25922948122024536, + "logits/rejected": 0.27459073066711426, + "logps/chosen": -303.64093017578125, + "logps/rejected": -268.06903076171875, + "loss": 0.6481, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.14507296681404114, + "rewards/margins": 0.05711113288998604, + "rewards/rejected": -0.20218412578105927, + "step": 5050 + }, + { + "epoch": 2.65, + "learning_rate": 2.0702449028972696e-08, + "logits/chosen": 0.2910730540752411, + "logits/rejected": 0.21585910022258759, + "logps/chosen": -404.71173095703125, + "logps/rejected": -333.95281982421875, + "loss": 0.6444, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.1354363113641739, + "rewards/margins": 0.13795578479766846, + "rewards/rejected": -0.2733921408653259, + "step": 5060 + }, + { + "epoch": 2.65, + "learning_rate": 2.0100110641817547e-08, + "logits/chosen": 0.35233062505722046, + "logits/rejected": 0.35314399003982544, + "logps/chosen": -290.5165100097656, + "logps/rejected": -295.0611267089844, + "loss": 0.6561, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.15800753235816956, + "rewards/margins": 0.08591978251934052, + "rewards/rejected": -0.24392731487751007, + "step": 5070 + }, + { + "epoch": 2.66, + "learning_rate": 1.9506297484766427e-08, + "logits/chosen": 0.2969042658805847, + "logits/rejected": 0.24884216487407684, + "logps/chosen": -309.47406005859375, + "logps/rejected": -283.39501953125, + "loss": 0.6405, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.16573520004749298, + "rewards/margins": 0.12126419693231583, + "rewards/rejected": -0.2869994044303894, + "step": 5080 + }, + { + "epoch": 2.66, + "learning_rate": 1.8921031577811692e-08, + "logits/chosen": 0.30634480714797974, + "logits/rejected": 0.23022684454917908, + "logps/chosen": -341.3998107910156, + "logps/rejected": -308.49285888671875, + "loss": 0.6408, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.17996549606323242, + "rewards/margins": 0.09575355052947998, + "rewards/rejected": -0.2757190465927124, + "step": 5090 + }, + { + "epoch": 2.67, + "learning_rate": 1.834433462399351e-08, + "logits/chosen": 0.23569568991661072, + "logits/rejected": 0.2792285084724426, + "logps/chosen": -388.8201599121094, + "logps/rejected": -319.556884765625, + "loss": 0.651, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.08855469524860382, + "rewards/margins": 0.13768477737903595, + "rewards/rejected": -0.22623948752880096, + "step": 5100 + }, + { + "epoch": 2.67, + "learning_rate": 1.7776228008594962e-08, + "logits/chosen": 0.266242653131485, + "logits/rejected": 0.2676263749599457, + "logps/chosen": -301.49359130859375, + "logps/rejected": -362.7807312011719, + "loss": 0.6505, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.17288120090961456, + "rewards/margins": 0.12345176935195923, + "rewards/rejected": -0.2963329553604126, + "step": 5110 + }, + { + "epoch": 2.68, + "learning_rate": 1.721673279834926e-08, + "logits/chosen": 0.2719994783401489, + "logits/rejected": 0.22567155957221985, + "logps/chosen": -358.1050720214844, + "logps/rejected": -335.2202453613281, + "loss": 0.6548, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.1040639653801918, + "rewards/margins": 0.1355864554643631, + "rewards/rejected": -0.2396504133939743, + "step": 5120 + }, + { + "epoch": 2.68, + "learning_rate": 1.666586974065831e-08, + "logits/chosen": 0.19955766201019287, + "logits/rejected": 0.22099065780639648, + "logps/chosen": -400.0057067871094, + "logps/rejected": -326.6587829589844, + "loss": 0.6457, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.1376148909330368, + "rewards/margins": 0.14305511116981506, + "rewards/rejected": -0.28067004680633545, + "step": 5130 + }, + { + "epoch": 2.69, + "learning_rate": 1.6123659262823497e-08, + "logits/chosen": 0.2476876676082611, + "logits/rejected": 0.31213703751564026, + "logps/chosen": -334.13592529296875, + "logps/rejected": -341.7435607910156, + "loss": 0.6548, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.1919669210910797, + "rewards/margins": 0.05596815422177315, + "rewards/rejected": -0.24793505668640137, + "step": 5140 + }, + { + "epoch": 2.69, + "learning_rate": 1.5590121471288104e-08, + "logits/chosen": 0.17362567782402039, + "logits/rejected": 0.22493436932563782, + "logps/chosen": -305.18109130859375, + "logps/rejected": -284.05657958984375, + "loss": 0.6466, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.18385998904705048, + "rewards/margins": 0.07839556038379669, + "rewards/rejected": -0.2622555196285248, + "step": 5150 + }, + { + "epoch": 2.7, + "learning_rate": 1.5065276150891787e-08, + "logits/chosen": 0.33663293719291687, + "logits/rejected": 0.34182173013687134, + "logps/chosen": -353.5645751953125, + "logps/rejected": -282.2459716796875, + "loss": 0.6498, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.14286093413829803, + "rewards/margins": 0.061833806335926056, + "rewards/rejected": -0.2046947181224823, + "step": 5160 + }, + { + "epoch": 2.71, + "learning_rate": 1.4549142764136768e-08, + "logits/chosen": 0.19153036177158356, + "logits/rejected": 0.20434775948524475, + "logps/chosen": -255.4372100830078, + "logps/rejected": -241.89028930664062, + "loss": 0.6467, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.19665054976940155, + "rewards/margins": 0.032036345452070236, + "rewards/rejected": -0.22868689894676208, + "step": 5170 + }, + { + "epoch": 2.71, + "learning_rate": 1.4041740450466383e-08, + "logits/chosen": 0.21942445635795593, + "logits/rejected": 0.26775887608528137, + "logps/chosen": -307.8087463378906, + "logps/rejected": -300.2779541015625, + "loss": 0.6502, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.14142893254756927, + "rewards/margins": 0.1599656045436859, + "rewards/rejected": -0.3013945519924164, + "step": 5180 + }, + { + "epoch": 2.72, + "learning_rate": 1.3543088025555094e-08, + "logits/chosen": 0.26968908309936523, + "logits/rejected": 0.3253975510597229, + "logps/chosen": -323.02215576171875, + "logps/rejected": -304.500732421875, + "loss": 0.643, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.16611634194850922, + "rewards/margins": 0.05251390486955643, + "rewards/rejected": -0.21863026916980743, + "step": 5190 + }, + { + "epoch": 2.72, + "learning_rate": 1.3053203980610744e-08, + "logits/chosen": 0.2522396445274353, + "logits/rejected": 0.27969008684158325, + "logps/chosen": -273.38287353515625, + "logps/rejected": -268.48370361328125, + "loss": 0.6467, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.18352146446704865, + "rewards/margins": 0.06839027255773544, + "rewards/rejected": -0.2519117295742035, + "step": 5200 + }, + { + "epoch": 2.73, + "learning_rate": 1.2572106481689243e-08, + "logits/chosen": 0.28615397214889526, + "logits/rejected": 0.2679155170917511, + "logps/chosen": -298.0022277832031, + "logps/rejected": -299.09765625, + "loss": 0.6431, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.16940812766551971, + "rewards/margins": 0.08789737522602081, + "rewards/rejected": -0.25730547308921814, + "step": 5210 + }, + { + "epoch": 2.73, + "learning_rate": 1.2099813369020467e-08, + "logits/chosen": 0.31619611382484436, + "logits/rejected": 0.32709795236587524, + "logps/chosen": -288.56951904296875, + "logps/rejected": -293.2323303222656, + "loss": 0.6493, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.17724359035491943, + "rewards/margins": 0.07778071612119675, + "rewards/rejected": -0.2550243139266968, + "step": 5220 + }, + { + "epoch": 2.74, + "learning_rate": 1.1636342156346846e-08, + "logits/chosen": 0.2666959762573242, + "logits/rejected": 0.2191891372203827, + "logps/chosen": -348.546630859375, + "logps/rejected": -304.4643859863281, + "loss": 0.637, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12001262605190277, + "rewards/margins": 0.17269203066825867, + "rewards/rejected": -0.29270467162132263, + "step": 5230 + }, + { + "epoch": 2.74, + "learning_rate": 1.1181710030274043e-08, + "logits/chosen": 0.2580859065055847, + "logits/rejected": 0.19054308533668518, + "logps/chosen": -390.20050048828125, + "logps/rejected": -336.88482666015625, + "loss": 0.6508, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.15105481445789337, + "rewards/margins": 0.10737206786870956, + "rewards/rejected": -0.25842687487602234, + "step": 5240 + }, + { + "epoch": 2.75, + "learning_rate": 1.0735933849633561e-08, + "logits/chosen": 0.26194819808006287, + "logits/rejected": 0.285000741481781, + "logps/chosen": -263.4084777832031, + "logps/rejected": -237.882568359375, + "loss": 0.6546, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.17627274990081787, + "rewards/margins": 0.08483530580997467, + "rewards/rejected": -0.26110807061195374, + "step": 5250 + }, + { + "epoch": 2.75, + "learning_rate": 1.0299030144857445e-08, + "logits/chosen": 0.2571147680282593, + "logits/rejected": 0.24691708385944366, + "logps/chosen": -366.71435546875, + "logps/rejected": -301.5981140136719, + "loss": 0.6364, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.10573141276836395, + "rewards/margins": 0.1143946647644043, + "rewards/rejected": -0.22012607753276825, + "step": 5260 + }, + { + "epoch": 2.76, + "learning_rate": 9.871015117365516e-09, + "logits/chosen": 0.2549286484718323, + "logits/rejected": 0.22352001070976257, + "logps/chosen": -308.32208251953125, + "logps/rejected": -247.04324340820312, + "loss": 0.6534, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.13238653540611267, + "rewards/margins": 0.17268718779087067, + "rewards/rejected": -0.30507367849349976, + "step": 5270 + }, + { + "epoch": 2.76, + "learning_rate": 9.451904638964447e-09, + "logits/chosen": 0.24427881836891174, + "logits/rejected": 0.2131018191576004, + "logps/chosen": -360.16229248046875, + "logps/rejected": -383.1683349609375, + "loss": 0.6616, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.1408374309539795, + "rewards/margins": 0.0509166419506073, + "rewards/rejected": -0.1917540729045868, + "step": 5280 + }, + { + "epoch": 2.77, + "learning_rate": 9.041714251259214e-09, + "logits/chosen": 0.2528178095817566, + "logits/rejected": 0.23741415143013, + "logps/chosen": -299.9393005371094, + "logps/rejected": -269.7351989746094, + "loss": 0.6448, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.16982091963291168, + "rewards/margins": 0.10371136665344238, + "rewards/rejected": -0.27353227138519287, + "step": 5290 + }, + { + "epoch": 2.77, + "learning_rate": 8.640459165076857e-09, + "logits/chosen": 0.15313532948493958, + "logits/rejected": 0.23000986874103546, + "logps/chosen": -320.9138488769531, + "logps/rejected": -266.3368835449219, + "loss": 0.6559, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.1354338675737381, + "rewards/margins": 0.07715844362974167, + "rewards/rejected": -0.21259228885173798, + "step": 5300 + }, + { + "epoch": 2.78, + "learning_rate": 8.248154259902246e-09, + "logits/chosen": 0.3111906051635742, + "logits/rejected": 0.30129846930503845, + "logps/chosen": -243.94140625, + "logps/rejected": -262.7614440917969, + "loss": 0.6409, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.1589013636112213, + "rewards/margins": 0.06876397132873535, + "rewards/rejected": -0.22766533493995667, + "step": 5310 + }, + { + "epoch": 2.78, + "learning_rate": 7.86481408332651e-09, + "logits/chosen": 0.23351125419139862, + "logits/rejected": 0.248075932264328, + "logps/chosen": -362.76446533203125, + "logps/rejected": -319.3492736816406, + "loss": 0.6513, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10988447815179825, + "rewards/margins": 0.1581057608127594, + "rewards/rejected": -0.26799023151397705, + "step": 5320 + }, + { + "epoch": 2.79, + "learning_rate": 7.490452850507506e-09, + "logits/chosen": 0.25614452362060547, + "logits/rejected": 0.22783274948596954, + "logps/chosen": -309.2509460449219, + "logps/rejected": -323.1338806152344, + "loss": 0.6487, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.15227383375167847, + "rewards/margins": 0.11466242372989655, + "rewards/rejected": -0.2669362425804138, + "step": 5330 + }, + { + "epoch": 2.79, + "learning_rate": 7.1250844436426535e-09, + "logits/chosen": 0.23697538673877716, + "logits/rejected": 0.3347683548927307, + "logps/chosen": -290.0230407714844, + "logps/rejected": -326.98626708984375, + "loss": 0.6518, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.17432980239391327, + "rewards/margins": 0.059077221900224686, + "rewards/rejected": -0.23340705037117004, + "step": 5340 + }, + { + "epoch": 2.8, + "learning_rate": 6.768722411454153e-09, + "logits/chosen": 0.3221455216407776, + "logits/rejected": 0.3266808092594147, + "logps/chosen": -335.4026794433594, + "logps/rejected": -314.0110778808594, + "loss": 0.6511, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.15195028483867645, + "rewards/margins": 0.08262494951486588, + "rewards/rejected": -0.23457522690296173, + "step": 5350 + }, + { + "epoch": 2.8, + "learning_rate": 6.421379968686663e-09, + "logits/chosen": 0.27548637986183167, + "logits/rejected": 0.2258666306734085, + "logps/chosen": -308.8509826660156, + "logps/rejected": -300.86566162109375, + "loss": 0.6423, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.1670268476009369, + "rewards/margins": 0.09721226990222931, + "rewards/rejected": -0.2642391324043274, + "step": 5360 + }, + { + "epoch": 2.81, + "learning_rate": 6.083069995617113e-09, + "logits/chosen": 0.3081795573234558, + "logits/rejected": 0.339841365814209, + "logps/chosen": -307.8616027832031, + "logps/rejected": -261.6089172363281, + "loss": 0.6438, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.12482471764087677, + "rewards/margins": 0.1607241928577423, + "rewards/rejected": -0.28554895520210266, + "step": 5370 + }, + { + "epoch": 2.82, + "learning_rate": 5.753805037577192e-09, + "logits/chosen": 0.22058221697807312, + "logits/rejected": 0.23570296168327332, + "logps/chosen": -345.9975891113281, + "logps/rejected": -292.41510009765625, + "loss": 0.6427, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.11676367372274399, + "rewards/margins": 0.13583344221115112, + "rewards/rejected": -0.2525970935821533, + "step": 5380 + }, + { + "epoch": 2.82, + "learning_rate": 5.433597304488113e-09, + "logits/chosen": 0.30112963914871216, + "logits/rejected": 0.2849612832069397, + "logps/chosen": -310.7317810058594, + "logps/rejected": -260.43902587890625, + "loss": 0.6522, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.15954820811748505, + "rewards/margins": 0.08634625375270844, + "rewards/rejected": -0.24589447677135468, + "step": 5390 + }, + { + "epoch": 2.83, + "learning_rate": 5.122458670407836e-09, + "logits/chosen": 0.25500181317329407, + "logits/rejected": 0.26972508430480957, + "logps/chosen": -353.02105712890625, + "logps/rejected": -318.2257385253906, + "loss": 0.6532, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.10740556567907333, + "rewards/margins": 0.1574164181947708, + "rewards/rejected": -0.26482197642326355, + "step": 5400 + }, + { + "epoch": 2.83, + "learning_rate": 4.820400673090669e-09, + "logits/chosen": 0.235460564494133, + "logits/rejected": 0.25399118661880493, + "logps/chosen": -298.4765625, + "logps/rejected": -291.35833740234375, + "loss": 0.6464, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.19252967834472656, + "rewards/margins": 0.07367880642414093, + "rewards/rejected": -0.2662084698677063, + "step": 5410 + }, + { + "epoch": 2.84, + "learning_rate": 4.5274345135595525e-09, + "logits/chosen": 0.20221033692359924, + "logits/rejected": 0.22863800823688507, + "logps/chosen": -316.8031921386719, + "logps/rejected": -259.7807312011719, + "loss": 0.652, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.18545842170715332, + "rewards/margins": 0.08575184643268585, + "rewards/rejected": -0.271210253238678, + "step": 5420 + }, + { + "epoch": 2.84, + "learning_rate": 4.243571055690648e-09, + "logits/chosen": 0.3070654273033142, + "logits/rejected": 0.33014267683029175, + "logps/chosen": -316.16876220703125, + "logps/rejected": -275.2470703125, + "loss": 0.6541, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1588907241821289, + "rewards/margins": 0.10213694721460342, + "rewards/rejected": -0.26102766394615173, + "step": 5430 + }, + { + "epoch": 2.85, + "learning_rate": 3.968820825810431e-09, + "logits/chosen": 0.204188734292984, + "logits/rejected": 0.23602977395057678, + "logps/chosen": -255.2363739013672, + "logps/rejected": -281.852783203125, + "loss": 0.653, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.17094658315181732, + "rewards/margins": 0.0320189967751503, + "rewards/rejected": -0.20296558737754822, + "step": 5440 + }, + { + "epoch": 2.85, + "learning_rate": 3.7031940123053997e-09, + "logits/chosen": 0.20005813241004944, + "logits/rejected": 0.1966632455587387, + "logps/chosen": -378.98028564453125, + "logps/rejected": -298.57147216796875, + "loss": 0.6502, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.11689343303442001, + "rewards/margins": 0.11867674440145493, + "rewards/rejected": -0.23557019233703613, + "step": 5450 + }, + { + "epoch": 2.86, + "learning_rate": 3.4467004652442842e-09, + "logits/chosen": 0.27505481243133545, + "logits/rejected": 0.289678156375885, + "logps/chosen": -343.13421630859375, + "logps/rejected": -371.6892395019531, + "loss": 0.6576, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.1668560802936554, + "rewards/margins": 0.056518711149692535, + "rewards/rejected": -0.22337479889392853, + "step": 5460 + }, + { + "epoch": 2.86, + "learning_rate": 3.1993496960127653e-09, + "logits/chosen": 0.3392692506313324, + "logits/rejected": 0.39931219816207886, + "logps/chosen": -337.546142578125, + "logps/rejected": -295.0710754394531, + "loss": 0.6407, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.15826204419136047, + "rewards/margins": 0.12155953794717789, + "rewards/rejected": -0.27982157468795776, + "step": 5470 + }, + { + "epoch": 2.87, + "learning_rate": 2.9611508769606663e-09, + "logits/chosen": 0.3724585175514221, + "logits/rejected": 0.352268248796463, + "logps/chosen": -331.1094055175781, + "logps/rejected": -318.9394226074219, + "loss": 0.6465, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.15400220453739166, + "rewards/margins": 0.11374969780445099, + "rewards/rejected": -0.26775187253952026, + "step": 5480 + }, + { + "epoch": 2.87, + "learning_rate": 2.7321128410620344e-09, + "logits/chosen": 0.27805405855178833, + "logits/rejected": 0.2672078013420105, + "logps/chosen": -279.8021545410156, + "logps/rejected": -265.70623779296875, + "loss": 0.6481, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.168594628572464, + "rewards/margins": 0.07576708495616913, + "rewards/rejected": -0.2443617284297943, + "step": 5490 + }, + { + "epoch": 2.88, + "learning_rate": 2.5122440815873724e-09, + "logits/chosen": 0.17598295211791992, + "logits/rejected": 0.17596113681793213, + "logps/chosen": -323.70770263671875, + "logps/rejected": -322.7217102050781, + "loss": 0.6518, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12877288460731506, + "rewards/margins": 0.17089615762233734, + "rewards/rejected": -0.2996690571308136, + "step": 5500 + }, + { + "epoch": 2.88, + "learning_rate": 2.301552751788838e-09, + "logits/chosen": 0.1916041374206543, + "logits/rejected": 0.2576969563961029, + "logps/chosen": -275.68389892578125, + "logps/rejected": -232.2618865966797, + "loss": 0.6408, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.16759951412677765, + "rewards/margins": 0.09753932058811188, + "rewards/rejected": -0.26513880491256714, + "step": 5510 + }, + { + "epoch": 2.89, + "learning_rate": 2.1000466645978433e-09, + "logits/chosen": 0.24124963581562042, + "logits/rejected": 0.26684752106666565, + "logps/chosen": -298.94512939453125, + "logps/rejected": -264.2241516113281, + "loss": 0.6566, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1678083837032318, + "rewards/margins": 0.09957209974527359, + "rewards/rejected": -0.2673804759979248, + "step": 5520 + }, + { + "epoch": 2.89, + "learning_rate": 1.9077332923353728e-09, + "logits/chosen": 0.2285464107990265, + "logits/rejected": 0.2565908133983612, + "logps/chosen": -353.0976867675781, + "logps/rejected": -318.3909606933594, + "loss": 0.6449, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.12425023317337036, + "rewards/margins": 0.18167167901992798, + "rewards/rejected": -0.30592188239097595, + "step": 5530 + }, + { + "epoch": 2.9, + "learning_rate": 1.7246197664347872e-09, + "logits/chosen": 0.26658621430397034, + "logits/rejected": 0.2418135702610016, + "logps/chosen": -272.8917236328125, + "logps/rejected": -229.7845001220703, + "loss": 0.6553, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.1345643848180771, + "rewards/margins": 0.13201124966144562, + "rewards/rejected": -0.2665756344795227, + "step": 5540 + }, + { + "epoch": 2.9, + "learning_rate": 1.5507128771775346e-09, + "logits/chosen": 0.12898774445056915, + "logits/rejected": 0.2068668156862259, + "logps/chosen": -326.1919860839844, + "logps/rejected": -342.0452575683594, + "loss": 0.6396, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.19251129031181335, + "rewards/margins": 0.06093855947256088, + "rewards/rejected": -0.25344985723495483, + "step": 5550 + }, + { + "epoch": 2.91, + "learning_rate": 1.3860190734411858e-09, + "logits/chosen": 0.1829042136669159, + "logits/rejected": 0.19009463489055634, + "logps/chosen": -242.2975616455078, + "logps/rejected": -209.1060333251953, + "loss": 0.6528, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.1318592131137848, + "rewards/margins": 0.10238520056009293, + "rewards/rejected": -0.2342444211244583, + "step": 5560 + }, + { + "epoch": 2.91, + "learning_rate": 1.2305444624604034e-09, + "logits/chosen": 0.21027176082134247, + "logits/rejected": 0.21280896663665771, + "logps/chosen": -309.7834167480469, + "logps/rejected": -266.2825012207031, + "loss": 0.6492, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.15875230729579926, + "rewards/margins": 0.10218574851751328, + "rewards/rejected": -0.2609381079673767, + "step": 5570 + }, + { + "epoch": 2.92, + "learning_rate": 1.0842948096004835e-09, + "logits/chosen": 0.2691759467124939, + "logits/rejected": 0.2674081325531006, + "logps/chosen": -276.37274169921875, + "logps/rejected": -237.9047393798828, + "loss": 0.6444, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.11138792335987091, + "rewards/margins": 0.1745198369026184, + "rewards/rejected": -0.2859077453613281, + "step": 5580 + }, + { + "epoch": 2.93, + "learning_rate": 9.472755381434161e-10, + "logits/chosen": 0.32743868231773376, + "logits/rejected": 0.31935983896255493, + "logps/chosen": -258.36651611328125, + "logps/rejected": -276.4884948730469, + "loss": 0.6507, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.19904500246047974, + "rewards/margins": 0.02864205464720726, + "rewards/rejected": -0.2276870757341385, + "step": 5590 + }, + { + "epoch": 2.93, + "learning_rate": 8.194917290869907e-10, + "logits/chosen": 0.28455591201782227, + "logits/rejected": 0.2898333966732025, + "logps/chosen": -334.1565856933594, + "logps/rejected": -255.84130859375, + "loss": 0.635, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.15140490233898163, + "rewards/margins": 0.15295550227165222, + "rewards/rejected": -0.30436041951179504, + "step": 5600 + }, + { + "epoch": 2.94, + "learning_rate": 7.009481209561685e-10, + "logits/chosen": 0.26674506068229675, + "logits/rejected": 0.24132461845874786, + "logps/chosen": -368.4808349609375, + "logps/rejected": -337.98358154296875, + "loss": 0.6436, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.16283926367759705, + "rewards/margins": 0.06844694167375565, + "rewards/rejected": -0.2312861979007721, + "step": 5610 + }, + { + "epoch": 2.94, + "learning_rate": 5.916491096275845e-10, + "logits/chosen": 0.21892204880714417, + "logits/rejected": 0.2129373550415039, + "logps/chosen": -298.697998046875, + "logps/rejected": -279.541015625, + "loss": 0.6529, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.2142627239227295, + "rewards/margins": 0.043218888342380524, + "rewards/rejected": -0.2574816346168518, + "step": 5620 + }, + { + "epoch": 2.95, + "learning_rate": 4.915987481662887e-10, + "logits/chosen": 0.24894659221172333, + "logits/rejected": 0.26731112599372864, + "logps/chosen": -295.58404541015625, + "logps/rejected": -274.62158203125, + "loss": 0.6419, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.1257566511631012, + "rewards/margins": 0.1312040090560913, + "rewards/rejected": -0.2569606900215149, + "step": 5630 + }, + { + "epoch": 2.95, + "learning_rate": 4.0080074667570017e-10, + "logits/chosen": 0.29957491159439087, + "logits/rejected": 0.3261059820652008, + "logps/chosen": -354.64599609375, + "logps/rejected": -299.35784912109375, + "loss": 0.6496, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.16229000687599182, + "rewards/margins": 0.11383986473083496, + "rewards/rejected": -0.2761298716068268, + "step": 5640 + }, + { + "epoch": 2.96, + "learning_rate": 3.1925847215980017e-10, + "logits/chosen": 0.1392301321029663, + "logits/rejected": 0.21554407477378845, + "logps/chosen": -423.4371032714844, + "logps/rejected": -379.70635986328125, + "loss": 0.6422, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.16402511298656464, + "rewards/margins": 0.11438943445682526, + "rewards/rejected": -0.2784145474433899, + "step": 5650 + }, + { + "epoch": 2.96, + "learning_rate": 2.469749483985095e-10, + "logits/chosen": 0.3565462827682495, + "logits/rejected": 0.36484184861183167, + "logps/chosen": -282.6678771972656, + "logps/rejected": -300.89361572265625, + "loss": 0.6606, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.17763136327266693, + "rewards/margins": 0.06572236865758896, + "rewards/rejected": -0.2433536946773529, + "step": 5660 + }, + { + "epoch": 2.97, + "learning_rate": 1.8395285583530652e-10, + "logits/chosen": 0.24649211764335632, + "logits/rejected": 0.2343049943447113, + "logps/chosen": -367.00567626953125, + "logps/rejected": -310.95733642578125, + "loss": 0.6447, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.11231978982686996, + "rewards/margins": 0.1657448410987854, + "rewards/rejected": -0.27806463837623596, + "step": 5670 + }, + { + "epoch": 2.97, + "learning_rate": 1.3019453147805614e-10, + "logits/chosen": 0.29656416177749634, + "logits/rejected": 0.2665735185146332, + "logps/chosen": -250.88455200195312, + "logps/rejected": -238.7122802734375, + "loss": 0.6589, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1941298097372055, + "rewards/margins": 0.03171468526124954, + "rewards/rejected": -0.22584450244903564, + "step": 5680 + }, + { + "epoch": 2.98, + "learning_rate": 8.570196881216297e-11, + "logits/chosen": 0.3129049837589264, + "logits/rejected": 0.2954845726490021, + "logps/chosen": -409.77923583984375, + "logps/rejected": -367.190185546875, + "loss": 0.6429, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.10958588123321533, + "rewards/margins": 0.10764807462692261, + "rewards/rejected": -0.21723394095897675, + "step": 5690 + }, + { + "epoch": 2.98, + "learning_rate": 5.0476817726852194e-11, + "logits/chosen": 0.2936273217201233, + "logits/rejected": 0.27621278166770935, + "logps/chosen": -325.35809326171875, + "logps/rejected": -317.4355163574219, + "loss": 0.6502, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1530463546514511, + "rewards/margins": 0.14051346480846405, + "rewards/rejected": -0.2935597896575928, + "step": 5700 + }, + { + "epoch": 2.99, + "learning_rate": 2.4520384453746712e-11, + "logits/chosen": 0.31328773498535156, + "logits/rejected": 0.23820796608924866, + "logps/chosen": -366.4006042480469, + "logps/rejected": -264.0309753417969, + "loss": 0.6488, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.1539250612258911, + "rewards/margins": 0.1407986581325531, + "rewards/rejected": -0.2947237193584442, + "step": 5710 + }, + { + "epoch": 2.99, + "learning_rate": 7.833631518627815e-12, + "logits/chosen": 0.197910338640213, + "logits/rejected": 0.16825783252716064, + "logps/chosen": -319.4342041015625, + "logps/rejected": -324.4068908691406, + "loss": 0.6445, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.15306967496871948, + "rewards/margins": 0.12025105953216553, + "rewards/rejected": -0.273320734500885, + "step": 5720 + }, + { + "epoch": 3.0, + "learning_rate": 4.1717770565830033e-13, + "logits/chosen": 0.24477490782737732, + "logits/rejected": 0.24169504642486572, + "logps/chosen": -302.2257995605469, + "logps/rejected": -296.517822265625, + "loss": 0.6535, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.13678932189941406, + "rewards/margins": 0.1357504278421402, + "rewards/rejected": -0.27253976464271545, + "step": 5730 + }, + { + "epoch": 3.0, + "step": 5733, + "total_flos": 0.0, + "train_loss": 0.6659654320045311, + "train_runtime": 38381.4335, + "train_samples_per_second": 4.778, + "train_steps_per_second": 0.149 + } + ], + "logging_steps": 10, + "max_steps": 5733, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 100, + "total_flos": 0.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +}