diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,7394 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 5250, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 9.523809523809524e-09, + "logits/chosen": -1.5964443683624268, + "logits/rejected": -1.3291687965393066, + "logps/chosen": -474.3575134277344, + "logps/rejected": -663.2249755859375, + "loss": 0.3087, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 9.523809523809525e-08, + "logits/chosen": -1.6813511848449707, + "logits/rejected": -1.2188762426376343, + "logps/chosen": -449.58489990234375, + "logps/rejected": -889.5899047851562, + "loss": 0.2155, + "rewards/accuracies": 0.3888888955116272, + "rewards/chosen": 0.00010568237485131249, + "rewards/margins": 6.739808304701e-05, + "rewards/rejected": 3.8284317270154133e-05, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 1.904761904761905e-07, + "logits/chosen": -1.7874151468276978, + "logits/rejected": -1.1624069213867188, + "logps/chosen": -428.51300048828125, + "logps/rejected": -803.0301513671875, + "loss": 0.2287, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.00012383742432575673, + "rewards/margins": 0.0009384436416439712, + "rewards/rejected": -0.0010622810805216432, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 2.8571428571428575e-07, + "logits/chosen": -1.5539613962173462, + "logits/rejected": -1.1322476863861084, + "logps/chosen": -443.5193786621094, + "logps/rejected": -830.2403564453125, + "loss": 0.2281, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 0.0007974267937242985, + "rewards/margins": 0.0017057094955816865, + "rewards/rejected": -0.0009082824690267444, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 3.80952380952381e-07, + "logits/chosen": -1.640062689781189, + "logits/rejected": -1.2586108446121216, + "logps/chosen": -430.81805419921875, + "logps/rejected": -854.3474731445312, + "loss": 0.2003, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 0.0021123916376382113, + "rewards/margins": 0.005200219340622425, + "rewards/rejected": -0.003087828401476145, + "step": 40 + }, + { + "epoch": 0.01, + "learning_rate": 4.7619047619047623e-07, + "logits/chosen": -1.5159130096435547, + "logits/rejected": -1.0649659633636475, + "logps/chosen": -482.67852783203125, + "logps/rejected": -778.0474853515625, + "loss": 0.1952, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 0.0075163752771914005, + "rewards/margins": 0.011120806448161602, + "rewards/rejected": -0.003604432102292776, + "step": 50 + }, + { + "epoch": 0.01, + "learning_rate": 5.714285714285715e-07, + "logits/chosen": -1.5851542949676514, + "logits/rejected": -1.1346595287322998, + "logps/chosen": -425.6817932128906, + "logps/rejected": -779.3834228515625, + "loss": 0.183, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.01108284667134285, + "rewards/margins": 0.01969107612967491, + "rewards/rejected": -0.008608227595686913, + "step": 60 + }, + { + "epoch": 0.01, + "learning_rate": 6.666666666666667e-07, + "logits/chosen": -1.533231496810913, + "logits/rejected": -1.0766208171844482, + "logps/chosen": -488.9933166503906, + "logps/rejected": -895.2310791015625, + "loss": 0.1867, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 0.017419463023543358, + "rewards/margins": 0.03203754127025604, + "rewards/rejected": -0.014618076384067535, + "step": 70 + }, + { + "epoch": 0.02, + "learning_rate": 7.61904761904762e-07, + "logits/chosen": -1.6760826110839844, + "logits/rejected": -0.9335016012191772, + "logps/chosen": -451.7982482910156, + "logps/rejected": -854.0350341796875, + "loss": 0.1739, + "rewards/accuracies": 0.875, + "rewards/chosen": 0.026161080226302147, + "rewards/margins": 0.05273517966270447, + "rewards/rejected": -0.02657409943640232, + "step": 80 + }, + { + "epoch": 0.02, + "learning_rate": 8.571428571428572e-07, + "logits/chosen": -1.7192933559417725, + "logits/rejected": -1.0688517093658447, + "logps/chosen": -404.2705078125, + "logps/rejected": -811.7678833007812, + "loss": 0.1649, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 0.027517270296812057, + "rewards/margins": 0.05363103747367859, + "rewards/rejected": -0.02611376717686653, + "step": 90 + }, + { + "epoch": 0.02, + "learning_rate": 9.523809523809525e-07, + "logits/chosen": -1.8428962230682373, + "logits/rejected": -1.1792418956756592, + "logps/chosen": -430.27227783203125, + "logps/rejected": -801.3963623046875, + "loss": 0.171, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 0.007343721576035023, + "rewards/margins": 0.0735684260725975, + "rewards/rejected": -0.06622470915317535, + "step": 100 + }, + { + "epoch": 0.02, + "learning_rate": 1.0476190476190478e-06, + "logits/chosen": -1.746883749961853, + "logits/rejected": -1.1373497247695923, + "logps/chosen": -525.6906127929688, + "logps/rejected": -1003.6932373046875, + "loss": 0.1356, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.020451098680496216, + "rewards/margins": 0.09704919159412384, + "rewards/rejected": -0.11750028282403946, + "step": 110 + }, + { + "epoch": 0.02, + "learning_rate": 1.142857142857143e-06, + "logits/chosen": -1.8047354221343994, + "logits/rejected": -1.0603820085525513, + "logps/chosen": -560.2908325195312, + "logps/rejected": -1008.1901245117188, + "loss": 0.1192, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.043726347386837006, + "rewards/margins": 0.1116614118218422, + "rewards/rejected": -0.1553877592086792, + "step": 120 + }, + { + "epoch": 0.02, + "learning_rate": 1.2380952380952382e-06, + "logits/chosen": -1.6039708852767944, + "logits/rejected": -1.0721991062164307, + "logps/chosen": -491.07830810546875, + "logps/rejected": -932.6652221679688, + "loss": 0.1444, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.03485647588968277, + "rewards/margins": 0.11112723499536514, + "rewards/rejected": -0.1459837257862091, + "step": 130 + }, + { + "epoch": 0.03, + "learning_rate": 1.3333333333333334e-06, + "logits/chosen": -1.6463550329208374, + "logits/rejected": -0.9608979225158691, + "logps/chosen": -519.837646484375, + "logps/rejected": -954.7674560546875, + "loss": 0.1098, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.04095975309610367, + "rewards/margins": 0.1356854885816574, + "rewards/rejected": -0.17664523422718048, + "step": 140 + }, + { + "epoch": 0.03, + "learning_rate": 1.4285714285714286e-06, + "logits/chosen": -1.7351688146591187, + "logits/rejected": -1.1353156566619873, + "logps/chosen": -493.08966064453125, + "logps/rejected": -1045.0201416015625, + "loss": 0.0944, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.06612655520439148, + "rewards/margins": 0.1745416820049286, + "rewards/rejected": -0.24066825211048126, + "step": 150 + }, + { + "epoch": 0.03, + "learning_rate": 1.523809523809524e-06, + "logits/chosen": -1.659099817276001, + "logits/rejected": -0.8211167454719543, + "logps/chosen": -581.5676879882812, + "logps/rejected": -1053.3389892578125, + "loss": 0.1352, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.10605227947235107, + "rewards/margins": 0.15199507772922516, + "rewards/rejected": -0.25804734230041504, + "step": 160 + }, + { + "epoch": 0.03, + "learning_rate": 1.6190476190476193e-06, + "logits/chosen": -1.67592453956604, + "logits/rejected": -1.0631893873214722, + "logps/chosen": -580.7254028320312, + "logps/rejected": -1190.563720703125, + "loss": 0.0905, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.10428521782159805, + "rewards/margins": 0.2247179001569748, + "rewards/rejected": -0.32900312542915344, + "step": 170 + }, + { + "epoch": 0.03, + "learning_rate": 1.7142857142857145e-06, + "logits/chosen": -1.650418996810913, + "logits/rejected": -0.8839709162712097, + "logps/chosen": -733.577392578125, + "logps/rejected": -1300.351318359375, + "loss": 0.095, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.1777438372373581, + "rewards/margins": 0.22024521231651306, + "rewards/rejected": -0.3979890048503876, + "step": 180 + }, + { + "epoch": 0.04, + "learning_rate": 1.8095238095238097e-06, + "logits/chosen": -1.567194938659668, + "logits/rejected": -0.9303449392318726, + "logps/chosen": -665.9202880859375, + "logps/rejected": -1264.822021484375, + "loss": 0.1258, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.19641616940498352, + "rewards/margins": 0.23649337887763977, + "rewards/rejected": -0.4329095482826233, + "step": 190 + }, + { + "epoch": 0.04, + "learning_rate": 1.904761904761905e-06, + "logits/chosen": -1.6757524013519287, + "logits/rejected": -1.3166964054107666, + "logps/chosen": -670.0177001953125, + "logps/rejected": -1232.9072265625, + "loss": 0.1117, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.1455942690372467, + "rewards/margins": 0.193759948015213, + "rewards/rejected": -0.3393542170524597, + "step": 200 + }, + { + "epoch": 0.04, + "learning_rate": 2.0000000000000003e-06, + "logits/chosen": -1.8672746419906616, + "logits/rejected": -1.0123827457427979, + "logps/chosen": -606.1552734375, + "logps/rejected": -1232.156005859375, + "loss": 0.0717, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.08139447122812271, + "rewards/margins": 0.24210381507873535, + "rewards/rejected": -0.3234982490539551, + "step": 210 + }, + { + "epoch": 0.04, + "learning_rate": 2.0952380952380955e-06, + "logits/chosen": -1.8828620910644531, + "logits/rejected": -1.178390383720398, + "logps/chosen": -536.4411010742188, + "logps/rejected": -1053.248779296875, + "loss": 0.1066, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.08360230922698975, + "rewards/margins": 0.19688987731933594, + "rewards/rejected": -0.2804921865463257, + "step": 220 + }, + { + "epoch": 0.04, + "learning_rate": 2.1904761904761908e-06, + "logits/chosen": -1.6069672107696533, + "logits/rejected": -1.1681115627288818, + "logps/chosen": -618.42529296875, + "logps/rejected": -1184.3099365234375, + "loss": 0.1236, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.17492257058620453, + "rewards/margins": 0.20526257157325745, + "rewards/rejected": -0.3801851272583008, + "step": 230 + }, + { + "epoch": 0.05, + "learning_rate": 2.285714285714286e-06, + "logits/chosen": -1.6747249364852905, + "logits/rejected": -1.1488839387893677, + "logps/chosen": -539.3297729492188, + "logps/rejected": -1078.1259765625, + "loss": 0.1195, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.13585665822029114, + "rewards/margins": 0.1913582980632782, + "rewards/rejected": -0.32721495628356934, + "step": 240 + }, + { + "epoch": 0.05, + "learning_rate": 2.380952380952381e-06, + "logits/chosen": -1.644960641860962, + "logits/rejected": -1.2164050340652466, + "logps/chosen": -542.1751708984375, + "logps/rejected": -1071.045166015625, + "loss": 0.1381, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13008072972297668, + "rewards/margins": 0.19909824430942535, + "rewards/rejected": -0.32917895913124084, + "step": 250 + }, + { + "epoch": 0.05, + "learning_rate": 2.4761904761904764e-06, + "logits/chosen": -1.7668380737304688, + "logits/rejected": -1.1439602375030518, + "logps/chosen": -615.5252075195312, + "logps/rejected": -1166.810302734375, + "loss": 0.1013, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.1580442488193512, + "rewards/margins": 0.22525843977928162, + "rewards/rejected": -0.3833027184009552, + "step": 260 + }, + { + "epoch": 0.05, + "learning_rate": 2.571428571428571e-06, + "logits/chosen": -1.813973069190979, + "logits/rejected": -1.1399123668670654, + "logps/chosen": -712.232421875, + "logps/rejected": -1244.3934326171875, + "loss": 0.1077, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.18287518620491028, + "rewards/margins": 0.2252916842699051, + "rewards/rejected": -0.40816688537597656, + "step": 270 + }, + { + "epoch": 0.05, + "learning_rate": 2.666666666666667e-06, + "logits/chosen": -1.6180734634399414, + "logits/rejected": -1.3701114654541016, + "logps/chosen": -494.2696228027344, + "logps/rejected": -1138.9771728515625, + "loss": 0.1045, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.12325029075145721, + "rewards/margins": 0.23065133392810822, + "rewards/rejected": -0.35390162467956543, + "step": 280 + }, + { + "epoch": 0.06, + "learning_rate": 2.7619047619047625e-06, + "logits/chosen": -1.694424033164978, + "logits/rejected": -1.0708407163619995, + "logps/chosen": -545.6209716796875, + "logps/rejected": -1122.974609375, + "loss": 0.1028, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.10549769550561905, + "rewards/margins": 0.24908974766731262, + "rewards/rejected": -0.35458746552467346, + "step": 290 + }, + { + "epoch": 0.06, + "learning_rate": 2.8571428571428573e-06, + "logits/chosen": -1.7387892007827759, + "logits/rejected": -1.1617491245269775, + "logps/chosen": -610.787841796875, + "logps/rejected": -1241.400146484375, + "loss": 0.0931, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.170683354139328, + "rewards/margins": 0.2684099078178406, + "rewards/rejected": -0.4390932619571686, + "step": 300 + }, + { + "epoch": 0.06, + "learning_rate": 2.9523809523809525e-06, + "logits/chosen": -1.7345823049545288, + "logits/rejected": -1.1376091241836548, + "logps/chosen": -719.1180419921875, + "logps/rejected": -1263.5145263671875, + "loss": 0.1242, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.1590810865163803, + "rewards/margins": 0.25278979539871216, + "rewards/rejected": -0.41187089681625366, + "step": 310 + }, + { + "epoch": 0.06, + "learning_rate": 3.047619047619048e-06, + "logits/chosen": -1.6919857263565063, + "logits/rejected": -1.050135612487793, + "logps/chosen": -729.0108642578125, + "logps/rejected": -1165.0267333984375, + "loss": 0.0923, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.2034168243408203, + "rewards/margins": 0.22210320830345154, + "rewards/rejected": -0.42552003264427185, + "step": 320 + }, + { + "epoch": 0.06, + "learning_rate": 3.142857142857143e-06, + "logits/chosen": -1.697484016418457, + "logits/rejected": -1.0732462406158447, + "logps/chosen": -671.1806640625, + "logps/rejected": -1204.2303466796875, + "loss": 0.0962, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.21383149921894073, + "rewards/margins": 0.2506061792373657, + "rewards/rejected": -0.4644376337528229, + "step": 330 + }, + { + "epoch": 0.06, + "learning_rate": 3.2380952380952385e-06, + "logits/chosen": -1.5445235967636108, + "logits/rejected": -1.014106035232544, + "logps/chosen": -623.9879760742188, + "logps/rejected": -1408.772705078125, + "loss": 0.042, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.15062114596366882, + "rewards/margins": 0.3381795883178711, + "rewards/rejected": -0.48880070447921753, + "step": 340 + }, + { + "epoch": 0.07, + "learning_rate": 3.3333333333333333e-06, + "logits/chosen": -1.8544213771820068, + "logits/rejected": -1.0226833820343018, + "logps/chosen": -607.8839111328125, + "logps/rejected": -1246.36083984375, + "loss": 0.0648, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11450695991516113, + "rewards/margins": 0.267674058675766, + "rewards/rejected": -0.3821810185909271, + "step": 350 + }, + { + "epoch": 0.07, + "learning_rate": 3.428571428571429e-06, + "logits/chosen": -1.7216541767120361, + "logits/rejected": -0.9856483340263367, + "logps/chosen": -749.8733520507812, + "logps/rejected": -1292.38037109375, + "loss": 0.068, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.231466606259346, + "rewards/margins": 0.22745048999786377, + "rewards/rejected": -0.4589170813560486, + "step": 360 + }, + { + "epoch": 0.07, + "learning_rate": 3.523809523809524e-06, + "logits/chosen": -1.7551990747451782, + "logits/rejected": -1.1457812786102295, + "logps/chosen": -781.4153442382812, + "logps/rejected": -1313.230224609375, + "loss": 0.0727, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.24709895253181458, + "rewards/margins": 0.2493913173675537, + "rewards/rejected": -0.4964902400970459, + "step": 370 + }, + { + "epoch": 0.07, + "learning_rate": 3.6190476190476194e-06, + "logits/chosen": -1.7148735523223877, + "logits/rejected": -1.0211610794067383, + "logps/chosen": -636.7725830078125, + "logps/rejected": -1297.733154296875, + "loss": 0.0641, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.2014390230178833, + "rewards/margins": 0.2895970940589905, + "rewards/rejected": -0.4910360872745514, + "step": 380 + }, + { + "epoch": 0.07, + "learning_rate": 3.7142857142857146e-06, + "logits/chosen": -1.7809518575668335, + "logits/rejected": -0.9582511782646179, + "logps/chosen": -719.4249267578125, + "logps/rejected": -1336.9053955078125, + "loss": 0.0912, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.26866966485977173, + "rewards/margins": 0.27918726205825806, + "rewards/rejected": -0.5478569269180298, + "step": 390 + }, + { + "epoch": 0.08, + "learning_rate": 3.80952380952381e-06, + "logits/chosen": -1.4761449098587036, + "logits/rejected": -1.0101737976074219, + "logps/chosen": -658.9013671875, + "logps/rejected": -1370.678466796875, + "loss": 0.0867, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.256266325712204, + "rewards/margins": 0.3092409670352936, + "rewards/rejected": -0.5655072927474976, + "step": 400 + }, + { + "epoch": 0.08, + "learning_rate": 3.9047619047619055e-06, + "logits/chosen": -1.4691818952560425, + "logits/rejected": -1.0662410259246826, + "logps/chosen": -654.7586669921875, + "logps/rejected": -1156.6165771484375, + "loss": 0.1032, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.23384161293506622, + "rewards/margins": 0.2041410505771637, + "rewards/rejected": -0.4379826486110687, + "step": 410 + }, + { + "epoch": 0.08, + "learning_rate": 4.000000000000001e-06, + "logits/chosen": -1.5862969160079956, + "logits/rejected": -1.1011335849761963, + "logps/chosen": -621.4929809570312, + "logps/rejected": -1237.9970703125, + "loss": 0.0978, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.2105666697025299, + "rewards/margins": 0.2511122226715088, + "rewards/rejected": -0.4616789221763611, + "step": 420 + }, + { + "epoch": 0.08, + "learning_rate": 4.095238095238096e-06, + "logits/chosen": -1.661075234413147, + "logits/rejected": -1.0312578678131104, + "logps/chosen": -711.7174682617188, + "logps/rejected": -1414.0506591796875, + "loss": 0.0657, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.22503916919231415, + "rewards/margins": 0.27457931637763977, + "rewards/rejected": -0.4996185302734375, + "step": 430 + }, + { + "epoch": 0.08, + "learning_rate": 4.190476190476191e-06, + "logits/chosen": -1.6692326068878174, + "logits/rejected": -0.7681063413619995, + "logps/chosen": -756.2827758789062, + "logps/rejected": -1376.8885498046875, + "loss": 0.0962, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.22421880066394806, + "rewards/margins": 0.2793051600456238, + "rewards/rejected": -0.5035240054130554, + "step": 440 + }, + { + "epoch": 0.09, + "learning_rate": 4.2857142857142855e-06, + "logits/chosen": -1.5787713527679443, + "logits/rejected": -1.0791809558868408, + "logps/chosen": -604.9113159179688, + "logps/rejected": -1252.309814453125, + "loss": 0.0779, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.1460736095905304, + "rewards/margins": 0.26809030771255493, + "rewards/rejected": -0.4141639769077301, + "step": 450 + }, + { + "epoch": 0.09, + "learning_rate": 4.3809523809523815e-06, + "logits/chosen": -1.7312390804290771, + "logits/rejected": -0.9138051867485046, + "logps/chosen": -715.1556396484375, + "logps/rejected": -1347.625, + "loss": 0.0806, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.20960795879364014, + "rewards/margins": 0.2895205020904541, + "rewards/rejected": -0.49912840127944946, + "step": 460 + }, + { + "epoch": 0.09, + "learning_rate": 4.476190476190477e-06, + "logits/chosen": -1.5289274454116821, + "logits/rejected": -1.2004892826080322, + "logps/chosen": -628.0669555664062, + "logps/rejected": -1309.748779296875, + "loss": 0.0994, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.1908506602048874, + "rewards/margins": 0.2644248604774475, + "rewards/rejected": -0.4552755355834961, + "step": 470 + }, + { + "epoch": 0.09, + "learning_rate": 4.571428571428572e-06, + "logits/chosen": -1.7772254943847656, + "logits/rejected": -1.2134960889816284, + "logps/chosen": -578.4381103515625, + "logps/rejected": -1015.9026489257812, + "loss": 0.1204, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.12097190320491791, + "rewards/margins": 0.13818739354610443, + "rewards/rejected": -0.25915926694869995, + "step": 480 + }, + { + "epoch": 0.09, + "learning_rate": 4.666666666666667e-06, + "logits/chosen": -1.963395357131958, + "logits/rejected": -1.1699910163879395, + "logps/chosen": -568.7506103515625, + "logps/rejected": -1148.7525634765625, + "loss": 0.0884, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.12507882714271545, + "rewards/margins": 0.23613107204437256, + "rewards/rejected": -0.3612098693847656, + "step": 490 + }, + { + "epoch": 0.1, + "learning_rate": 4.761904761904762e-06, + "logits/chosen": -1.768434762954712, + "logits/rejected": -1.1358940601348877, + "logps/chosen": -684.4830322265625, + "logps/rejected": -1343.7208251953125, + "loss": 0.1135, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.18173658847808838, + "rewards/margins": 0.26491695642471313, + "rewards/rejected": -0.4466535449028015, + "step": 500 + }, + { + "epoch": 0.1, + "learning_rate": 4.857142857142858e-06, + "logits/chosen": -1.6421762704849243, + "logits/rejected": -0.9460729360580444, + "logps/chosen": -699.533935546875, + "logps/rejected": -1446.2193603515625, + "loss": 0.0739, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.24191920459270477, + "rewards/margins": 0.28883248567581177, + "rewards/rejected": -0.530751645565033, + "step": 510 + }, + { + "epoch": 0.1, + "learning_rate": 4.952380952380953e-06, + "logits/chosen": -1.6789777278900146, + "logits/rejected": -1.0460736751556396, + "logps/chosen": -814.0193481445312, + "logps/rejected": -1421.7989501953125, + "loss": 0.0817, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.2921529710292816, + "rewards/margins": 0.2904835343360901, + "rewards/rejected": -0.5826364755630493, + "step": 520 + }, + { + "epoch": 0.1, + "learning_rate": 4.999986185163754e-06, + "logits/chosen": -1.9284679889678955, + "logits/rejected": -1.4057942628860474, + "logps/chosen": -558.6351318359375, + "logps/rejected": -1179.9552001953125, + "loss": 0.1042, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.15910789370536804, + "rewards/margins": 0.24427099525928497, + "rewards/rejected": -0.4033789038658142, + "step": 530 + }, + { + "epoch": 0.1, + "learning_rate": 4.999875667389858e-06, + "logits/chosen": -1.66916823387146, + "logits/rejected": -1.0905592441558838, + "logps/chosen": -606.5189208984375, + "logps/rejected": -1062.45947265625, + "loss": 0.1271, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.12606561183929443, + "rewards/margins": 0.18360337615013123, + "rewards/rejected": -0.30966901779174805, + "step": 540 + }, + { + "epoch": 0.1, + "learning_rate": 4.999654636727765e-06, + "logits/chosen": -1.606479287147522, + "logits/rejected": -1.17953622341156, + "logps/chosen": -577.0275268554688, + "logps/rejected": -1118.5433349609375, + "loss": 0.0908, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.1509220004081726, + "rewards/margins": 0.21017400920391083, + "rewards/rejected": -0.36109599471092224, + "step": 550 + }, + { + "epoch": 0.11, + "learning_rate": 4.999323102948655e-06, + "logits/chosen": -1.8047797679901123, + "logits/rejected": -1.2963651418685913, + "logps/chosen": -632.2808837890625, + "logps/rejected": -1058.11083984375, + "loss": 0.0952, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.16345271468162537, + "rewards/margins": 0.18198499083518982, + "rewards/rejected": -0.3454377055168152, + "step": 560 + }, + { + "epoch": 0.11, + "learning_rate": 4.998881080708759e-06, + "logits/chosen": -1.8658708333969116, + "logits/rejected": -1.3321269750595093, + "logps/chosen": -554.2672119140625, + "logps/rejected": -1096.3828125, + "loss": 0.0875, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.14818139374256134, + "rewards/margins": 0.24486231803894043, + "rewards/rejected": -0.3930436968803406, + "step": 570 + }, + { + "epoch": 0.11, + "learning_rate": 4.998328589548711e-06, + "logits/chosen": -1.7779667377471924, + "logits/rejected": -1.069665551185608, + "logps/chosen": -675.4833984375, + "logps/rejected": -1202.579833984375, + "loss": 0.1088, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.17311152815818787, + "rewards/margins": 0.23328053951263428, + "rewards/rejected": -0.40639209747314453, + "step": 580 + }, + { + "epoch": 0.11, + "learning_rate": 4.997665653892682e-06, + "logits/chosen": -1.888587236404419, + "logits/rejected": -1.1475058794021606, + "logps/chosen": -627.3590698242188, + "logps/rejected": -1246.740478515625, + "loss": 0.0582, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.17301705479621887, + "rewards/margins": 0.28004103899002075, + "rewards/rejected": -0.4530580937862396, + "step": 590 + }, + { + "epoch": 0.11, + "learning_rate": 4.996892303047306e-06, + "logits/chosen": -1.7172037363052368, + "logits/rejected": -1.1663614511489868, + "logps/chosen": -652.0533447265625, + "logps/rejected": -1193.7652587890625, + "loss": 0.0975, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.2103462666273117, + "rewards/margins": 0.2457568347454071, + "rewards/rejected": -0.4561030864715576, + "step": 600 + }, + { + "epoch": 0.12, + "learning_rate": 4.996008571200375e-06, + "logits/chosen": -1.6690038442611694, + "logits/rejected": -1.0809996128082275, + "logps/chosen": -702.9678955078125, + "logps/rejected": -1331.991455078125, + "loss": 0.0731, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.1795978993177414, + "rewards/margins": 0.2755267918109894, + "rewards/rejected": -0.4551246762275696, + "step": 610 + }, + { + "epoch": 0.12, + "learning_rate": 4.995014497419336e-06, + "logits/chosen": -1.9420398473739624, + "logits/rejected": -1.0578997135162354, + "logps/chosen": -653.03759765625, + "logps/rejected": -1236.6177978515625, + "loss": 0.0922, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.15195727348327637, + "rewards/margins": 0.25116264820098877, + "rewards/rejected": -0.4031199514865875, + "step": 620 + }, + { + "epoch": 0.12, + "learning_rate": 4.993910125649561e-06, + "logits/chosen": -1.5005296468734741, + "logits/rejected": -1.0164679288864136, + "logps/chosen": -615.0857543945312, + "logps/rejected": -1095.970703125, + "loss": 0.1492, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.19480828940868378, + "rewards/margins": 0.16079875826835632, + "rewards/rejected": -0.3556070625782013, + "step": 630 + }, + { + "epoch": 0.12, + "learning_rate": 4.992695504712402e-06, + "logits/chosen": -1.7464653253555298, + "logits/rejected": -1.2301980257034302, + "logps/chosen": -601.227783203125, + "logps/rejected": -1081.1474609375, + "loss": 0.1075, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.12417513132095337, + "rewards/margins": 0.20312102138996124, + "rewards/rejected": -0.3272961378097534, + "step": 640 + }, + { + "epoch": 0.12, + "learning_rate": 4.9913706883030385e-06, + "logits/chosen": -1.81307053565979, + "logits/rejected": -1.2394088506698608, + "logps/chosen": -603.0763549804688, + "logps/rejected": -1259.51318359375, + "loss": 0.0631, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.1690419614315033, + "rewards/margins": 0.2731035649776459, + "rewards/rejected": -0.44214552640914917, + "step": 650 + }, + { + "epoch": 0.13, + "learning_rate": 4.989935734988098e-06, + "logits/chosen": -1.7271366119384766, + "logits/rejected": -1.0474259853363037, + "logps/chosen": -673.4783325195312, + "logps/rejected": -1239.798095703125, + "loss": 0.0771, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.21036569774150848, + "rewards/margins": 0.2717295289039612, + "rewards/rejected": -0.48209524154663086, + "step": 660 + }, + { + "epoch": 0.13, + "learning_rate": 4.988390708203068e-06, + "logits/chosen": -1.5817875862121582, + "logits/rejected": -1.0434249639511108, + "logps/chosen": -648.2857666015625, + "logps/rejected": -1361.281982421875, + "loss": 0.0946, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.22290560603141785, + "rewards/margins": 0.29205870628356934, + "rewards/rejected": -0.5149643421173096, + "step": 670 + }, + { + "epoch": 0.13, + "learning_rate": 4.9867356762494955e-06, + "logits/chosen": -1.653638243675232, + "logits/rejected": -0.9126855731010437, + "logps/chosen": -761.7947998046875, + "logps/rejected": -1445.9437255859375, + "loss": 0.0596, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.2962535321712494, + "rewards/margins": 0.299728125333786, + "rewards/rejected": -0.5959817171096802, + "step": 680 + }, + { + "epoch": 0.13, + "learning_rate": 4.984970712291963e-06, + "logits/chosen": -1.6935688257217407, + "logits/rejected": -0.995235800743103, + "logps/chosen": -836.0851440429688, + "logps/rejected": -1352.14892578125, + "loss": 0.1075, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.33499711751937866, + "rewards/margins": 0.24044211208820343, + "rewards/rejected": -0.5754392743110657, + "step": 690 + }, + { + "epoch": 0.13, + "learning_rate": 4.983095894354858e-06, + "logits/chosen": -1.7480075359344482, + "logits/rejected": -1.0178004503250122, + "logps/chosen": -839.4945068359375, + "logps/rejected": -1367.689697265625, + "loss": 0.0708, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.285785973072052, + "rewards/margins": 0.2461218386888504, + "rewards/rejected": -0.5319077372550964, + "step": 700 + }, + { + "epoch": 0.14, + "learning_rate": 4.981111305318918e-06, + "logits/chosen": -1.6903988122940063, + "logits/rejected": -1.0864533185958862, + "logps/chosen": -639.9157104492188, + "logps/rejected": -1278.444580078125, + "loss": 0.1003, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.18382251262664795, + "rewards/margins": 0.2932285666465759, + "rewards/rejected": -0.4770510792732239, + "step": 710 + }, + { + "epoch": 0.14, + "learning_rate": 4.979017032917576e-06, + "logits/chosen": -1.6954967975616455, + "logits/rejected": -1.1473253965377808, + "logps/chosen": -663.8118896484375, + "logps/rejected": -1232.072509765625, + "loss": 0.0869, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.16720175743103027, + "rewards/margins": 0.2587115168571472, + "rewards/rejected": -0.4259132444858551, + "step": 720 + }, + { + "epoch": 0.14, + "learning_rate": 4.97681316973307e-06, + "logits/chosen": -1.6922680139541626, + "logits/rejected": -1.0211702585220337, + "logps/chosen": -684.892333984375, + "logps/rejected": -1208.362060546875, + "loss": 0.0882, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.18825049698352814, + "rewards/margins": 0.23126792907714844, + "rewards/rejected": -0.4195184111595154, + "step": 730 + }, + { + "epoch": 0.14, + "learning_rate": 4.9744998131923625e-06, + "logits/chosen": -1.9704052209854126, + "logits/rejected": -1.2731428146362305, + "logps/chosen": -636.3994140625, + "logps/rejected": -1263.0999755859375, + "loss": 0.0978, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.14151528477668762, + "rewards/margins": 0.2585150897502899, + "rewards/rejected": -0.4000304341316223, + "step": 740 + }, + { + "epoch": 0.14, + "learning_rate": 4.9720770655628216e-06, + "logits/chosen": -1.7848608493804932, + "logits/rejected": -1.0659257173538208, + "logps/chosen": -657.3236083984375, + "logps/rejected": -1317.057373046875, + "loss": 0.0631, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15722636878490448, + "rewards/margins": 0.2890471816062927, + "rewards/rejected": -0.4462736248970032, + "step": 750 + }, + { + "epoch": 0.14, + "learning_rate": 4.969545033947711e-06, + "logits/chosen": -1.6175647974014282, + "logits/rejected": -1.0041881799697876, + "logps/chosen": -588.1198120117188, + "logps/rejected": -1212.6221923828125, + "loss": 0.0978, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.145894855260849, + "rewards/margins": 0.26582056283950806, + "rewards/rejected": -0.41171541810035706, + "step": 760 + }, + { + "epoch": 0.15, + "learning_rate": 4.966903830281449e-06, + "logits/chosen": -1.6709251403808594, + "logits/rejected": -1.0464791059494019, + "logps/chosen": -646.2835693359375, + "logps/rejected": -1256.691650390625, + "loss": 0.0764, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12737174332141876, + "rewards/margins": 0.2582496106624603, + "rewards/rejected": -0.3856213688850403, + "step": 770 + }, + { + "epoch": 0.15, + "learning_rate": 4.964153571324658e-06, + "logits/chosen": -1.998659372329712, + "logits/rejected": -1.257143259048462, + "logps/chosen": -553.2044677734375, + "logps/rejected": -1021.4732666015625, + "loss": 0.0801, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.10653400421142578, + "rewards/margins": 0.22628924250602722, + "rewards/rejected": -0.3328232169151306, + "step": 780 + }, + { + "epoch": 0.15, + "learning_rate": 4.96129437865901e-06, + "logits/chosen": -1.7126219272613525, + "logits/rejected": -1.3472967147827148, + "logps/chosen": -638.3450927734375, + "logps/rejected": -1300.0621337890625, + "loss": 0.0974, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.18389222025871277, + "rewards/margins": 0.24962754547595978, + "rewards/rejected": -0.43351978063583374, + "step": 790 + }, + { + "epoch": 0.15, + "learning_rate": 4.958326378681849e-06, + "logits/chosen": -1.5197988748550415, + "logits/rejected": -1.0763747692108154, + "logps/chosen": -741.8438720703125, + "logps/rejected": -1362.937744140625, + "loss": 0.0525, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.2865040600299835, + "rewards/margins": 0.27982592582702637, + "rewards/rejected": -0.5663300156593323, + "step": 800 + }, + { + "epoch": 0.15, + "learning_rate": 4.955249702600598e-06, + "logits/chosen": -1.8825486898422241, + "logits/rejected": -0.9662033319473267, + "logps/chosen": -796.578857421875, + "logps/rejected": -1616.077880859375, + "loss": 0.0481, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.3106931746006012, + "rewards/margins": 0.35244977474212646, + "rewards/rejected": -0.66314297914505, + "step": 810 + }, + { + "epoch": 0.16, + "learning_rate": 4.952064486426965e-06, + "logits/chosen": -1.5078961849212646, + "logits/rejected": -0.939461886882782, + "logps/chosen": -729.2574462890625, + "logps/rejected": -1372.853759765625, + "loss": 0.0811, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.24250641465187073, + "rewards/margins": 0.2867491841316223, + "rewards/rejected": -0.5292556285858154, + "step": 820 + }, + { + "epoch": 0.16, + "learning_rate": 4.948770870970929e-06, + "logits/chosen": -1.7515017986297607, + "logits/rejected": -1.2939387559890747, + "logps/chosen": -522.4594116210938, + "logps/rejected": -1244.282958984375, + "loss": 0.0794, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.1327625960111618, + "rewards/margins": 0.30878084897994995, + "rewards/rejected": -0.44154348969459534, + "step": 830 + }, + { + "epoch": 0.16, + "learning_rate": 4.9453690018345144e-06, + "logits/chosen": -1.435917854309082, + "logits/rejected": -0.7938503623008728, + "logps/chosen": -638.5426635742188, + "logps/rejected": -1297.34521484375, + "loss": 0.0819, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.22242049872875214, + "rewards/margins": 0.2777343988418579, + "rewards/rejected": -0.5001549124717712, + "step": 840 + }, + { + "epoch": 0.16, + "learning_rate": 4.941859029405354e-06, + "logits/chosen": -1.5241379737854004, + "logits/rejected": -0.9835844039916992, + "logps/chosen": -676.1991577148438, + "logps/rejected": -1275.102294921875, + "loss": 0.09, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.2034449577331543, + "rewards/margins": 0.26251763105392456, + "rewards/rejected": -0.46596255898475647, + "step": 850 + }, + { + "epoch": 0.16, + "learning_rate": 4.938241108850039e-06, + "logits/chosen": -1.6833593845367432, + "logits/rejected": -1.1207275390625, + "logps/chosen": -641.3517456054688, + "logps/rejected": -1251.5594482421875, + "loss": 0.0755, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.18910585343837738, + "rewards/margins": 0.26478061079978943, + "rewards/rejected": -0.4538864493370056, + "step": 860 + }, + { + "epoch": 0.17, + "learning_rate": 4.934515400107266e-06, + "logits/chosen": -1.549037218093872, + "logits/rejected": -1.1462652683258057, + "logps/chosen": -786.075927734375, + "logps/rejected": -1397.09619140625, + "loss": 0.0853, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.2606995105743408, + "rewards/margins": 0.2590792179107666, + "rewards/rejected": -0.5197787284851074, + "step": 870 + }, + { + "epoch": 0.17, + "learning_rate": 4.930682067880759e-06, + "logits/chosen": -1.7059816122055054, + "logits/rejected": -1.009918212890625, + "logps/chosen": -720.1533813476562, + "logps/rejected": -1230.998779296875, + "loss": 0.0719, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.2683635354042053, + "rewards/margins": 0.2336823046207428, + "rewards/rejected": -0.5020458698272705, + "step": 880 + }, + { + "epoch": 0.17, + "learning_rate": 4.926741281631991e-06, + "logits/chosen": -1.4708434343338013, + "logits/rejected": -1.0346577167510986, + "logps/chosen": -668.7374267578125, + "logps/rejected": -1247.803955078125, + "loss": 0.106, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.25595933198928833, + "rewards/margins": 0.2600180506706238, + "rewards/rejected": -0.5159772634506226, + "step": 890 + }, + { + "epoch": 0.17, + "learning_rate": 4.922693215572695e-06, + "logits/chosen": -1.2816441059112549, + "logits/rejected": -0.8067164421081543, + "logps/chosen": -746.5218505859375, + "logps/rejected": -1298.2572021484375, + "loss": 0.1129, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.2702776789665222, + "rewards/margins": 0.24418941140174866, + "rewards/rejected": -0.5144670605659485, + "step": 900 + }, + { + "epoch": 0.17, + "learning_rate": 4.91853804865716e-06, + "logits/chosen": -1.8930604457855225, + "logits/rejected": -0.9798396825790405, + "logps/chosen": -685.3873291015625, + "logps/rejected": -1278.208984375, + "loss": 0.0875, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.18094095587730408, + "rewards/margins": 0.2873077392578125, + "rewards/rejected": -0.4682486951351166, + "step": 910 + }, + { + "epoch": 0.18, + "learning_rate": 4.91427596457432e-06, + "logits/chosen": -1.7996612787246704, + "logits/rejected": -1.2704670429229736, + "logps/chosen": -548.4567260742188, + "logps/rejected": -1128.166259765625, + "loss": 0.1133, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.14571422338485718, + "rewards/margins": 0.24841785430908203, + "rewards/rejected": -0.39413201808929443, + "step": 920 + }, + { + "epoch": 0.18, + "learning_rate": 4.909907151739634e-06, + "logits/chosen": -1.5436075925827026, + "logits/rejected": -1.099595308303833, + "logps/chosen": -691.2551879882812, + "logps/rejected": -1293.091552734375, + "loss": 0.0535, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.2075759917497635, + "rewards/margins": 0.2663406729698181, + "rewards/rejected": -0.4739166796207428, + "step": 930 + }, + { + "epoch": 0.18, + "learning_rate": 4.905431803286756e-06, + "logits/chosen": -1.9107078313827515, + "logits/rejected": -0.9111030697822571, + "logps/chosen": -640.4327392578125, + "logps/rejected": -1321.55859375, + "loss": 0.0529, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.1775406301021576, + "rewards/margins": 0.2937074601650238, + "rewards/rejected": -0.4712480902671814, + "step": 940 + }, + { + "epoch": 0.18, + "learning_rate": 4.900850117059e-06, + "logits/chosen": -1.5877889394760132, + "logits/rejected": -0.9842106103897095, + "logps/chosen": -651.833984375, + "logps/rejected": -1406.99755859375, + "loss": 0.0446, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.20326288044452667, + "rewards/margins": 0.3253769278526306, + "rewards/rejected": -0.5286397933959961, + "step": 950 + }, + { + "epoch": 0.18, + "learning_rate": 4.8961622956005895e-06, + "logits/chosen": -1.8441241979599, + "logits/rejected": -1.0815012454986572, + "logps/chosen": -683.8507690429688, + "logps/rejected": -1448.83251953125, + "loss": 0.0413, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.2018641233444214, + "rewards/margins": 0.36305880546569824, + "rewards/rejected": -0.5649229288101196, + "step": 960 + }, + { + "epoch": 0.18, + "learning_rate": 4.891368546147707e-06, + "logits/chosen": -1.7532212734222412, + "logits/rejected": -1.226049780845642, + "logps/chosen": -605.4937744140625, + "logps/rejected": -1278.6380615234375, + "loss": 0.0747, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.18908478319644928, + "rewards/margins": 0.3011362552642822, + "rewards/rejected": -0.4902211129665375, + "step": 970 + }, + { + "epoch": 0.19, + "learning_rate": 4.88646908061933e-06, + "logits/chosen": -1.7473132610321045, + "logits/rejected": -0.987618088722229, + "logps/chosen": -714.2844848632812, + "logps/rejected": -1421.032470703125, + "loss": 0.0738, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.24226772785186768, + "rewards/margins": 0.3205004930496216, + "rewards/rejected": -0.5627682209014893, + "step": 980 + }, + { + "epoch": 0.19, + "learning_rate": 4.881464115607866e-06, + "logits/chosen": -1.8514864444732666, + "logits/rejected": -1.102475643157959, + "logps/chosen": -690.0552368164062, + "logps/rejected": -1299.9759521484375, + "loss": 0.0803, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.22629126906394958, + "rewards/margins": 0.30736809968948364, + "rewards/rejected": -0.5336593985557556, + "step": 990 + }, + { + "epoch": 0.19, + "learning_rate": 4.876353872369573e-06, + "logits/chosen": -1.8886613845825195, + "logits/rejected": -1.1019657850265503, + "logps/chosen": -635.8572998046875, + "logps/rejected": -1302.39453125, + "loss": 0.063, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.17442987859249115, + "rewards/margins": 0.3282366394996643, + "rewards/rejected": -0.5026665329933167, + "step": 1000 + }, + { + "epoch": 0.19, + "learning_rate": 4.871138576814782e-06, + "logits/chosen": -1.7834192514419556, + "logits/rejected": -1.2944515943527222, + "logps/chosen": -820.8410034179688, + "logps/rejected": -1417.619873046875, + "loss": 0.0897, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.2722592055797577, + "rewards/margins": 0.28066331148147583, + "rewards/rejected": -0.5529226064682007, + "step": 1010 + }, + { + "epoch": 0.19, + "learning_rate": 4.865818459497911e-06, + "logits/chosen": -1.4951821565628052, + "logits/rejected": -0.9517828822135925, + "logps/chosen": -659.3931884765625, + "logps/rejected": -1239.884033203125, + "loss": 0.0906, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.22253477573394775, + "rewards/margins": 0.25982776284217834, + "rewards/rejected": -0.4823625683784485, + "step": 1020 + }, + { + "epoch": 0.2, + "learning_rate": 4.860393755607266e-06, + "logits/chosen": -1.7842209339141846, + "logits/rejected": -1.2502198219299316, + "logps/chosen": -612.7080078125, + "logps/rejected": -1303.169189453125, + "loss": 0.0567, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.16437311470508575, + "rewards/margins": 0.3025432229042053, + "rewards/rejected": -0.46691638231277466, + "step": 1030 + }, + { + "epoch": 0.2, + "learning_rate": 4.854864704954654e-06, + "logits/chosen": -1.4791333675384521, + "logits/rejected": -0.8677660822868347, + "logps/chosen": -597.54296875, + "logps/rejected": -1209.46435546875, + "loss": 0.0787, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.18261802196502686, + "rewards/margins": 0.2735896706581116, + "rewards/rejected": -0.4562076926231384, + "step": 1040 + }, + { + "epoch": 0.2, + "learning_rate": 4.849231551964771e-06, + "logits/chosen": -1.5059902667999268, + "logits/rejected": -1.0160598754882812, + "logps/chosen": -761.7801513671875, + "logps/rejected": -1293.54248046875, + "loss": 0.0912, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.255204439163208, + "rewards/margins": 0.23546037077903748, + "rewards/rejected": -0.4906648099422455, + "step": 1050 + }, + { + "epoch": 0.2, + "learning_rate": 4.843494545664407e-06, + "logits/chosen": -1.6067050695419312, + "logits/rejected": -1.233272910118103, + "logps/chosen": -522.5372314453125, + "logps/rejected": -1160.08642578125, + "loss": 0.0894, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.14243915677070618, + "rewards/margins": 0.2808658480644226, + "rewards/rejected": -0.4233049750328064, + "step": 1060 + }, + { + "epoch": 0.2, + "learning_rate": 4.837653939671427e-06, + "logits/chosen": -1.5032033920288086, + "logits/rejected": -0.8692194819450378, + "logps/chosen": -603.5737915039062, + "logps/rejected": -1262.277099609375, + "loss": 0.0768, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.19397929310798645, + "rewards/margins": 0.2933715283870697, + "rewards/rejected": -0.48735085129737854, + "step": 1070 + }, + { + "epoch": 0.21, + "learning_rate": 4.8317099921835695e-06, + "logits/chosen": -1.7000210285186768, + "logits/rejected": -0.9161543846130371, + "logps/chosen": -673.3897705078125, + "logps/rejected": -1238.2972412109375, + "loss": 0.0756, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.18038219213485718, + "rewards/margins": 0.2853950262069702, + "rewards/rejected": -0.4657772183418274, + "step": 1080 + }, + { + "epoch": 0.21, + "learning_rate": 4.825662965967023e-06, + "logits/chosen": -1.2642765045166016, + "logits/rejected": -1.0610508918762207, + "logps/chosen": -568.6837768554688, + "logps/rejected": -1316.013427734375, + "loss": 0.086, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.1493438184261322, + "rewards/margins": 0.304331511259079, + "rewards/rejected": -0.4536752700805664, + "step": 1090 + }, + { + "epoch": 0.21, + "learning_rate": 4.819513128344814e-06, + "logits/chosen": -1.4977022409439087, + "logits/rejected": -0.9430916905403137, + "logps/chosen": -771.9939575195312, + "logps/rejected": -1390.864013671875, + "loss": 0.0744, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.2511390447616577, + "rewards/margins": 0.27243170142173767, + "rewards/rejected": -0.5235707759857178, + "step": 1100 + }, + { + "epoch": 0.21, + "learning_rate": 4.813260751184992e-06, + "logits/chosen": -1.6140292882919312, + "logits/rejected": -1.1604433059692383, + "logps/chosen": -611.8510131835938, + "logps/rejected": -1271.919677734375, + "loss": 0.0816, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.23755709826946259, + "rewards/margins": 0.29791098833084106, + "rewards/rejected": -0.5354681015014648, + "step": 1110 + }, + { + "epoch": 0.21, + "learning_rate": 4.806906110888606e-06, + "logits/chosen": -1.57673180103302, + "logits/rejected": -0.8760968446731567, + "logps/chosen": -773.2528076171875, + "logps/rejected": -1487.0355224609375, + "loss": 0.0695, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.26283028721809387, + "rewards/margins": 0.34201639890670776, + "rewards/rejected": -0.604846715927124, + "step": 1120 + }, + { + "epoch": 0.22, + "learning_rate": 4.8004494883774885e-06, + "logits/chosen": -1.4768116474151611, + "logits/rejected": -1.0381088256835938, + "logps/chosen": -592.5392456054688, + "logps/rejected": -1219.542724609375, + "loss": 0.0907, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1403500735759735, + "rewards/margins": 0.27375248074531555, + "rewards/rejected": -0.41410255432128906, + "step": 1130 + }, + { + "epoch": 0.22, + "learning_rate": 4.793891169081835e-06, + "logits/chosen": -1.7134135961532593, + "logits/rejected": -1.066749095916748, + "logps/chosen": -520.9646606445312, + "logps/rejected": -1094.593994140625, + "loss": 0.1118, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.056602220982313156, + "rewards/margins": 0.25242939591407776, + "rewards/rejected": -0.30903160572052, + "step": 1140 + }, + { + "epoch": 0.22, + "learning_rate": 4.787231442927587e-06, + "logits/chosen": -1.4377086162567139, + "logits/rejected": -0.8465448617935181, + "logps/chosen": -630.1041259765625, + "logps/rejected": -1104.6734619140625, + "loss": 0.094, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.15882766246795654, + "rewards/margins": 0.21694330871105194, + "rewards/rejected": -0.3757709264755249, + "step": 1150 + }, + { + "epoch": 0.22, + "learning_rate": 4.780470604323616e-06, + "logits/chosen": -1.6286237239837646, + "logits/rejected": -0.8310405611991882, + "logps/chosen": -762.2435302734375, + "logps/rejected": -1267.188232421875, + "loss": 0.0882, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.2872321307659149, + "rewards/margins": 0.22836999595165253, + "rewards/rejected": -0.5156021118164062, + "step": 1160 + }, + { + "epoch": 0.22, + "learning_rate": 4.773608952148706e-06, + "logits/chosen": -1.4241678714752197, + "logits/rejected": -1.0249742269515991, + "logps/chosen": -659.6388549804688, + "logps/rejected": -1119.01904296875, + "loss": 0.1083, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.1795882135629654, + "rewards/margins": 0.19241158664226532, + "rewards/rejected": -0.3719998002052307, + "step": 1170 + }, + { + "epoch": 0.22, + "learning_rate": 4.766646789738342e-06, + "logits/chosen": -1.4974104166030884, + "logits/rejected": -1.0342199802398682, + "logps/chosen": -511.42803955078125, + "logps/rejected": -1142.3897705078125, + "loss": 0.0633, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.128463476896286, + "rewards/margins": 0.25377964973449707, + "rewards/rejected": -0.3822430968284607, + "step": 1180 + }, + { + "epoch": 0.23, + "learning_rate": 4.759584424871302e-06, + "logits/chosen": -1.4972057342529297, + "logits/rejected": -1.0252101421356201, + "logps/chosen": -576.6381225585938, + "logps/rejected": -1152.332763671875, + "loss": 0.1047, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.16848218441009521, + "rewards/margins": 0.2421194612979889, + "rewards/rejected": -0.4106016755104065, + "step": 1190 + }, + { + "epoch": 0.23, + "learning_rate": 4.752422169756048e-06, + "logits/chosen": -1.483394980430603, + "logits/rejected": -1.089908242225647, + "logps/chosen": -659.2501831054688, + "logps/rejected": -1330.499267578125, + "loss": 0.0568, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.18682973086833954, + "rewards/margins": 0.303545743227005, + "rewards/rejected": -0.49037545919418335, + "step": 1200 + }, + { + "epoch": 0.23, + "learning_rate": 4.745160341016927e-06, + "logits/chosen": -1.725095510482788, + "logits/rejected": -1.0486876964569092, + "logps/chosen": -745.1365356445312, + "logps/rejected": -1397.880859375, + "loss": 0.0587, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.23421664535999298, + "rewards/margins": 0.3120633661746979, + "rewards/rejected": -0.546280026435852, + "step": 1210 + }, + { + "epoch": 0.23, + "learning_rate": 4.737799259680172e-06, + "logits/chosen": -1.8329044580459595, + "logits/rejected": -1.0362406969070435, + "logps/chosen": -665.9381103515625, + "logps/rejected": -1365.664306640625, + "loss": 0.0595, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.1887119710445404, + "rewards/margins": 0.34067028760910034, + "rewards/rejected": -0.5293822288513184, + "step": 1220 + }, + { + "epoch": 0.23, + "learning_rate": 4.730339251159709e-06, + "logits/chosen": -1.4984453916549683, + "logits/rejected": -0.9244080781936646, + "logps/chosen": -627.0536499023438, + "logps/rejected": -1200.630615234375, + "loss": 0.076, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.16129754483699799, + "rewards/margins": 0.27342361211776733, + "rewards/rejected": -0.4347211718559265, + "step": 1230 + }, + { + "epoch": 0.24, + "learning_rate": 4.722780645242775e-06, + "logits/chosen": -1.8025153875350952, + "logits/rejected": -1.0183075666427612, + "logps/chosen": -678.0471801757812, + "logps/rejected": -1255.756591796875, + "loss": 0.073, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.18587812781333923, + "rewards/margins": 0.2857830822467804, + "rewards/rejected": -0.47166118025779724, + "step": 1240 + }, + { + "epoch": 0.24, + "learning_rate": 4.715123776075337e-06, + "logits/chosen": -1.7555253505706787, + "logits/rejected": -1.0257951021194458, + "logps/chosen": -708.4110107421875, + "logps/rejected": -1359.506103515625, + "loss": 0.0791, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.2188255488872528, + "rewards/margins": 0.30667853355407715, + "rewards/rejected": -0.5255040526390076, + "step": 1250 + }, + { + "epoch": 0.24, + "learning_rate": 4.707368982147318e-06, + "logits/chosen": -1.6728156805038452, + "logits/rejected": -0.9375909566879272, + "logps/chosen": -703.1410522460938, + "logps/rejected": -1263.3475341796875, + "loss": 0.0895, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.21338506042957306, + "rewards/margins": 0.2970745265483856, + "rewards/rejected": -0.5104595422744751, + "step": 1260 + }, + { + "epoch": 0.24, + "learning_rate": 4.699516606277638e-06, + "logits/chosen": -1.8740787506103516, + "logits/rejected": -1.362985610961914, + "logps/chosen": -728.4583129882812, + "logps/rejected": -1369.810302734375, + "loss": 0.0746, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.23265771567821503, + "rewards/margins": 0.2759070098400116, + "rewards/rejected": -0.5085647702217102, + "step": 1270 + }, + { + "epoch": 0.24, + "learning_rate": 4.691566995599056e-06, + "logits/chosen": -1.7465486526489258, + "logits/rejected": -0.9074887037277222, + "logps/chosen": -530.6092529296875, + "logps/rejected": -1085.4388427734375, + "loss": 0.074, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.1201125830411911, + "rewards/margins": 0.26156720519065857, + "rewards/rejected": -0.38167980313301086, + "step": 1280 + }, + { + "epoch": 0.25, + "learning_rate": 4.683520501542825e-06, + "logits/chosen": -1.7438312768936157, + "logits/rejected": -1.0178749561309814, + "logps/chosen": -585.673583984375, + "logps/rejected": -1169.25732421875, + "loss": 0.0714, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.14680668711662292, + "rewards/margins": 0.25362518429756165, + "rewards/rejected": -0.40043187141418457, + "step": 1290 + }, + { + "epoch": 0.25, + "learning_rate": 4.675377479823153e-06, + "logits/chosen": -1.4712369441986084, + "logits/rejected": -1.0751581192016602, + "logps/chosen": -608.4730834960938, + "logps/rejected": -1213.798095703125, + "loss": 0.0928, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.16733074188232422, + "rewards/margins": 0.2824379503726959, + "rewards/rejected": -0.44976872205734253, + "step": 1300 + }, + { + "epoch": 0.25, + "learning_rate": 4.667138290421483e-06, + "logits/chosen": -1.4218109846115112, + "logits/rejected": -1.0891635417938232, + "logps/chosen": -546.42236328125, + "logps/rejected": -1120.02734375, + "loss": 0.0776, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.17138846218585968, + "rewards/margins": 0.25035151839256287, + "rewards/rejected": -0.42173999547958374, + "step": 1310 + }, + { + "epoch": 0.25, + "learning_rate": 4.658803297570578e-06, + "logits/chosen": -1.5527435541152954, + "logits/rejected": -0.772042989730835, + "logps/chosen": -707.126708984375, + "logps/rejected": -1424.598876953125, + "loss": 0.0438, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.19388630986213684, + "rewards/margins": 0.3253108859062195, + "rewards/rejected": -0.5191971063613892, + "step": 1320 + }, + { + "epoch": 0.25, + "learning_rate": 4.650372869738415e-06, + "logits/chosen": -1.4559705257415771, + "logits/rejected": -0.9750394821166992, + "logps/chosen": -564.2633056640625, + "logps/rejected": -1231.0230712890625, + "loss": 0.0747, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.16324150562286377, + "rewards/margins": 0.28780093789100647, + "rewards/rejected": -0.45104241371154785, + "step": 1330 + }, + { + "epoch": 0.26, + "learning_rate": 4.641847379611898e-06, + "logits/chosen": -1.6298139095306396, + "logits/rejected": -0.9655359387397766, + "logps/chosen": -630.3314208984375, + "logps/rejected": -1392.5352783203125, + "loss": 0.0764, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.19043368101119995, + "rewards/margins": 0.3249918818473816, + "rewards/rejected": -0.5154255628585815, + "step": 1340 + }, + { + "epoch": 0.26, + "learning_rate": 4.633227204080389e-06, + "logits/chosen": -1.6190248727798462, + "logits/rejected": -1.1195992231369019, + "logps/chosen": -551.1062622070312, + "logps/rejected": -1251.203857421875, + "loss": 0.0634, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.17107483744621277, + "rewards/margins": 0.2999062240123749, + "rewards/rejected": -0.47098103165626526, + "step": 1350 + }, + { + "epoch": 0.26, + "learning_rate": 4.624512724219038e-06, + "logits/chosen": -1.4386488199234009, + "logits/rejected": -0.9899671673774719, + "logps/chosen": -709.4961547851562, + "logps/rejected": -1226.273681640625, + "loss": 0.1151, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.2317754030227661, + "rewards/margins": 0.25371408462524414, + "rewards/rejected": -0.48548945784568787, + "step": 1360 + }, + { + "epoch": 0.26, + "learning_rate": 4.6157043252719374e-06, + "logits/chosen": -1.7422698736190796, + "logits/rejected": -1.1107302904129028, + "logps/chosen": -825.3232421875, + "logps/rejected": -1382.18603515625, + "loss": 0.0723, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.27356773614883423, + "rewards/margins": 0.28873828053474426, + "rewards/rejected": -0.5623060464859009, + "step": 1370 + }, + { + "epoch": 0.26, + "learning_rate": 4.606802396635098e-06, + "logits/chosen": -1.6875635385513306, + "logits/rejected": -1.162154197692871, + "logps/chosen": -702.98095703125, + "logps/rejected": -1256.966796875, + "loss": 0.079, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.22531962394714355, + "rewards/margins": 0.25439611077308655, + "rewards/rejected": -0.4797157347202301, + "step": 1380 + }, + { + "epoch": 0.26, + "learning_rate": 4.597807331839229e-06, + "logits/chosen": -1.7705532312393188, + "logits/rejected": -0.9259660840034485, + "logps/chosen": -735.7838745117188, + "logps/rejected": -1272.888427734375, + "loss": 0.0651, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.19407150149345398, + "rewards/margins": 0.2555926740169525, + "rewards/rejected": -0.4496641755104065, + "step": 1390 + }, + { + "epoch": 0.27, + "learning_rate": 4.588719528532342e-06, + "logits/chosen": -1.7049188613891602, + "logits/rejected": -1.1274850368499756, + "logps/chosen": -646.8198852539062, + "logps/rejected": -1386.414306640625, + "loss": 0.053, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.15076547861099243, + "rewards/margins": 0.32411348819732666, + "rewards/rejected": -0.4748789668083191, + "step": 1400 + }, + { + "epoch": 0.27, + "learning_rate": 4.5795393884621735e-06, + "logits/chosen": -1.9536349773406982, + "logits/rejected": -1.1568233966827393, + "logps/chosen": -591.2590942382812, + "logps/rejected": -1258.59423828125, + "loss": 0.0747, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.10888688266277313, + "rewards/margins": 0.2812551259994507, + "rewards/rejected": -0.390142023563385, + "step": 1410 + }, + { + "epoch": 0.27, + "learning_rate": 4.5702673174584236e-06, + "logits/chosen": -1.6346734762191772, + "logits/rejected": -1.1741018295288086, + "logps/chosen": -746.7779541015625, + "logps/rejected": -1454.915771484375, + "loss": 0.0742, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.2458966076374054, + "rewards/margins": 0.2765346169471741, + "rewards/rejected": -0.5224311947822571, + "step": 1420 + }, + { + "epoch": 0.27, + "learning_rate": 4.560903725414816e-06, + "logits/chosen": -1.743407964706421, + "logits/rejected": -0.8613992929458618, + "logps/chosen": -671.2027587890625, + "logps/rejected": -1287.640380859375, + "loss": 0.0842, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.20365352928638458, + "rewards/margins": 0.26316341757774353, + "rewards/rejected": -0.4668169617652893, + "step": 1430 + }, + { + "epoch": 0.27, + "learning_rate": 4.551449026270979e-06, + "logits/chosen": -1.7058141231536865, + "logits/rejected": -1.1486746072769165, + "logps/chosen": -598.8638916015625, + "logps/rejected": -1300.490234375, + "loss": 0.0781, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.19492121040821075, + "rewards/margins": 0.283249169588089, + "rewards/rejected": -0.47817039489746094, + "step": 1440 + }, + { + "epoch": 0.28, + "learning_rate": 4.541903637994142e-06, + "logits/chosen": -1.6145961284637451, + "logits/rejected": -1.0189629793167114, + "logps/chosen": -635.6578369140625, + "logps/rejected": -1167.5048828125, + "loss": 0.0972, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.18294695019721985, + "rewards/margins": 0.23105594515800476, + "rewards/rejected": -0.4140028953552246, + "step": 1450 + }, + { + "epoch": 0.28, + "learning_rate": 4.532267982560662e-06, + "logits/chosen": -1.6065614223480225, + "logits/rejected": -1.3095319271087646, + "logps/chosen": -658.8798828125, + "logps/rejected": -1332.044921875, + "loss": 0.0866, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.20158949494361877, + "rewards/margins": 0.29118824005126953, + "rewards/rejected": -0.49277767539024353, + "step": 1460 + }, + { + "epoch": 0.28, + "learning_rate": 4.522542485937369e-06, + "logits/chosen": -1.469521403312683, + "logits/rejected": -0.9519163966178894, + "logps/chosen": -594.472412109375, + "logps/rejected": -1056.48193359375, + "loss": 0.1178, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.16802926361560822, + "rewards/margins": 0.1843966841697693, + "rewards/rejected": -0.3524259328842163, + "step": 1470 + }, + { + "epoch": 0.28, + "learning_rate": 4.512727578062733e-06, + "logits/chosen": -1.5861170291900635, + "logits/rejected": -0.9400532841682434, + "logps/chosen": -671.851318359375, + "logps/rejected": -1219.797119140625, + "loss": 0.0934, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.1471502184867859, + "rewards/margins": 0.26563459634780884, + "rewards/rejected": -0.41278475522994995, + "step": 1480 + }, + { + "epoch": 0.28, + "learning_rate": 4.502823692827859e-06, + "logits/chosen": -1.5500032901763916, + "logits/rejected": -1.1829593181610107, + "logps/chosen": -658.6322021484375, + "logps/rejected": -1304.2266845703125, + "loss": 0.077, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.18946467339992523, + "rewards/margins": 0.2668909430503845, + "rewards/rejected": -0.45635563135147095, + "step": 1490 + }, + { + "epoch": 0.29, + "learning_rate": 4.492831268057307e-06, + "logits/chosen": -1.625832200050354, + "logits/rejected": -1.0373882055282593, + "logps/chosen": -696.6791381835938, + "logps/rejected": -1352.232177734375, + "loss": 0.0882, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.1951400637626648, + "rewards/margins": 0.277163565158844, + "rewards/rejected": -0.472303569316864, + "step": 1500 + }, + { + "epoch": 0.29, + "learning_rate": 4.482750745489733e-06, + "logits/chosen": -1.787223219871521, + "logits/rejected": -1.222612977027893, + "logps/chosen": -550.8800659179688, + "logps/rejected": -1146.3341064453125, + "loss": 0.0782, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.10685940086841583, + "rewards/margins": 0.2857755422592163, + "rewards/rejected": -0.39263495802879333, + "step": 1510 + }, + { + "epoch": 0.29, + "learning_rate": 4.472582570758367e-06, + "logits/chosen": -1.728491187095642, + "logits/rejected": -1.01820969581604, + "logps/chosen": -593.0372314453125, + "logps/rejected": -1177.662353515625, + "loss": 0.0638, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.16049079596996307, + "rewards/margins": 0.2860396206378937, + "rewards/rejected": -0.44653043150901794, + "step": 1520 + }, + { + "epoch": 0.29, + "learning_rate": 4.4623271933713065e-06, + "logits/chosen": -1.6648460626602173, + "logits/rejected": -1.033857822418213, + "logps/chosen": -671.03955078125, + "logps/rejected": -1315.7705078125, + "loss": 0.0945, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.22596442699432373, + "rewards/margins": 0.28475135564804077, + "rewards/rejected": -0.5107157826423645, + "step": 1530 + }, + { + "epoch": 0.29, + "learning_rate": 4.451985066691649e-06, + "logits/chosen": -1.670251488685608, + "logits/rejected": -1.3727920055389404, + "logps/chosen": -600.6444091796875, + "logps/rejected": -1437.553466796875, + "loss": 0.0697, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.20430073142051697, + "rewards/margins": 0.3402923047542572, + "rewards/rejected": -0.5445930361747742, + "step": 1540 + }, + { + "epoch": 0.3, + "learning_rate": 4.441556647917447e-06, + "logits/chosen": -1.5559931993484497, + "logits/rejected": -0.9859651327133179, + "logps/chosen": -561.6361694335938, + "logps/rejected": -1220.6103515625, + "loss": 0.0743, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.13392092287540436, + "rewards/margins": 0.2867361903190613, + "rewards/rejected": -0.42065709829330444, + "step": 1550 + }, + { + "epoch": 0.3, + "learning_rate": 4.431042398061499e-06, + "logits/chosen": -1.6274524927139282, + "logits/rejected": -1.1113866567611694, + "logps/chosen": -502.992919921875, + "logps/rejected": -1168.184814453125, + "loss": 0.0575, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12179882824420929, + "rewards/margins": 0.3009914457798004, + "rewards/rejected": -0.4227902889251709, + "step": 1560 + }, + { + "epoch": 0.3, + "learning_rate": 4.420442781930971e-06, + "logits/chosen": -1.6948182582855225, + "logits/rejected": -1.1259328126907349, + "logps/chosen": -610.978515625, + "logps/rejected": -1261.6180419921875, + "loss": 0.0832, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.14310654997825623, + "rewards/margins": 0.2861831784248352, + "rewards/rejected": -0.42928972840309143, + "step": 1570 + }, + { + "epoch": 0.3, + "learning_rate": 4.409758268106842e-06, + "logits/chosen": -1.746582269668579, + "logits/rejected": -0.878632664680481, + "logps/chosen": -616.5509033203125, + "logps/rejected": -1264.705810546875, + "loss": 0.0566, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.17718395590782166, + "rewards/margins": 0.28132373094558716, + "rewards/rejected": -0.4585076868534088, + "step": 1580 + }, + { + "epoch": 0.3, + "learning_rate": 4.398989328923196e-06, + "logits/chosen": -1.4909377098083496, + "logits/rejected": -1.01114022731781, + "logps/chosen": -630.6038818359375, + "logps/rejected": -1251.208740234375, + "loss": 0.0856, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.20834538340568542, + "rewards/margins": 0.24381554126739502, + "rewards/rejected": -0.45216089487075806, + "step": 1590 + }, + { + "epoch": 0.3, + "learning_rate": 4.388136440446338e-06, + "logits/chosen": -1.6888008117675781, + "logits/rejected": -0.99406498670578, + "logps/chosen": -651.6956787109375, + "logps/rejected": -1196.780029296875, + "loss": 0.0897, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.18217213451862335, + "rewards/margins": 0.24324896931648254, + "rewards/rejected": -0.4254210889339447, + "step": 1600 + }, + { + "epoch": 0.31, + "learning_rate": 4.377200082453748e-06, + "logits/chosen": -1.49949312210083, + "logits/rejected": -0.9790937304496765, + "logps/chosen": -680.9384765625, + "logps/rejected": -1257.7886962890625, + "loss": 0.0735, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.21910205483436584, + "rewards/margins": 0.2613365054130554, + "rewards/rejected": -0.48043856024742126, + "step": 1610 + }, + { + "epoch": 0.31, + "learning_rate": 4.366180738412876e-06, + "logits/chosen": -1.6257928609848022, + "logits/rejected": -0.9221351742744446, + "logps/chosen": -675.370849609375, + "logps/rejected": -1347.3052978515625, + "loss": 0.062, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.21879585087299347, + "rewards/margins": 0.3183351159095764, + "rewards/rejected": -0.5371309518814087, + "step": 1620 + }, + { + "epoch": 0.31, + "learning_rate": 4.355078895459761e-06, + "logits/chosen": -1.6502177715301514, + "logits/rejected": -1.099336862564087, + "logps/chosen": -707.7600708007812, + "logps/rejected": -1389.7249755859375, + "loss": 0.0547, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.2330164611339569, + "rewards/margins": 0.3357909321784973, + "rewards/rejected": -0.5688074231147766, + "step": 1630 + }, + { + "epoch": 0.31, + "learning_rate": 4.343895044377504e-06, + "logits/chosen": -1.718955397605896, + "logits/rejected": -0.9656432867050171, + "logps/chosen": -754.4881591796875, + "logps/rejected": -1390.9036865234375, + "loss": 0.0546, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.255190908908844, + "rewards/margins": 0.3234071433544159, + "rewards/rejected": -0.5785980224609375, + "step": 1640 + }, + { + "epoch": 0.31, + "learning_rate": 4.332629679574566e-06, + "logits/chosen": -1.55838143825531, + "logits/rejected": -0.9745651483535767, + "logps/chosen": -663.7210083007812, + "logps/rejected": -1346.267578125, + "loss": 0.0885, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.23024721443653107, + "rewards/margins": 0.3076723515987396, + "rewards/rejected": -0.5379196405410767, + "step": 1650 + }, + { + "epoch": 0.32, + "learning_rate": 4.321283299062916e-06, + "logits/chosen": -1.5372596979141235, + "logits/rejected": -0.9359537959098816, + "logps/chosen": -734.9241943359375, + "logps/rejected": -1340.1929931640625, + "loss": 0.1026, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.28472191095352173, + "rewards/margins": 0.2690187096595764, + "rewards/rejected": -0.5537406802177429, + "step": 1660 + }, + { + "epoch": 0.32, + "learning_rate": 4.309856404436013e-06, + "logits/chosen": -1.6407476663589478, + "logits/rejected": -0.9992292523384094, + "logps/chosen": -695.8706665039062, + "logps/rejected": -1365.452392578125, + "loss": 0.0707, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.2776133120059967, + "rewards/margins": 0.31161263585090637, + "rewards/rejected": -0.5892259478569031, + "step": 1670 + }, + { + "epoch": 0.32, + "learning_rate": 4.2983495008466285e-06, + "logits/chosen": -1.4128013849258423, + "logits/rejected": -1.0901668071746826, + "logps/chosen": -641.1650390625, + "logps/rejected": -1331.385986328125, + "loss": 0.0782, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.25103798508644104, + "rewards/margins": 0.30623993277549744, + "rewards/rejected": -0.5572779774665833, + "step": 1680 + }, + { + "epoch": 0.32, + "learning_rate": 4.2867630969845235e-06, + "logits/chosen": -1.4749224185943604, + "logits/rejected": -0.9041854739189148, + "logps/chosen": -793.1533203125, + "logps/rejected": -1348.232177734375, + "loss": 0.0763, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.30572837591171265, + "rewards/margins": 0.26624101400375366, + "rewards/rejected": -0.5719693303108215, + "step": 1690 + }, + { + "epoch": 0.32, + "learning_rate": 4.275097705053951e-06, + "logits/chosen": -1.478694200515747, + "logits/rejected": -0.8133836984634399, + "logps/chosen": -868.2639770507812, + "logps/rejected": -1334.706787109375, + "loss": 0.0688, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.3474667966365814, + "rewards/margins": 0.25684088468551636, + "rewards/rejected": -0.6043076515197754, + "step": 1700 + }, + { + "epoch": 0.33, + "learning_rate": 4.263353840751023e-06, + "logits/chosen": -1.2793939113616943, + "logits/rejected": -0.7485678195953369, + "logps/chosen": -711.3675537109375, + "logps/rejected": -1447.356201171875, + "loss": 0.0921, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.257205069065094, + "rewards/margins": 0.3200908303260803, + "rewards/rejected": -0.5772958397865295, + "step": 1710 + }, + { + "epoch": 0.33, + "learning_rate": 4.251532023240901e-06, + "logits/chosen": -1.4069713354110718, + "logits/rejected": -0.8339581489562988, + "logps/chosen": -700.1744384765625, + "logps/rejected": -1356.793701171875, + "loss": 0.0729, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.22528398036956787, + "rewards/margins": 0.28295961022377014, + "rewards/rejected": -0.5082435607910156, + "step": 1720 + }, + { + "epoch": 0.33, + "learning_rate": 4.239632775134857e-06, + "logits/chosen": -1.3647840023040771, + "logits/rejected": -0.9706377983093262, + "logps/chosen": -675.4508056640625, + "logps/rejected": -1254.943603515625, + "loss": 0.0813, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.20136883854866028, + "rewards/margins": 0.24962525069713593, + "rewards/rejected": -0.4509941041469574, + "step": 1730 + }, + { + "epoch": 0.33, + "learning_rate": 4.227656622467162e-06, + "logits/chosen": -1.5252206325531006, + "logits/rejected": -1.0396662950515747, + "logps/chosen": -519.870849609375, + "logps/rejected": -1278.7230224609375, + "loss": 0.054, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.11870086193084717, + "rewards/margins": 0.32463350892066956, + "rewards/rejected": -0.4433344006538391, + "step": 1740 + }, + { + "epoch": 0.33, + "learning_rate": 4.215604094671835e-06, + "logits/chosen": -1.6197888851165771, + "logits/rejected": -1.015608787536621, + "logps/chosen": -514.943359375, + "logps/rejected": -1060.022216796875, + "loss": 0.0818, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.12082231044769287, + "rewards/margins": 0.24318580329418182, + "rewards/rejected": -0.3640081286430359, + "step": 1750 + }, + { + "epoch": 0.34, + "learning_rate": 4.203475724559235e-06, + "logits/chosen": -1.5419548749923706, + "logits/rejected": -1.1807676553726196, + "logps/chosen": -548.9449462890625, + "logps/rejected": -1381.0179443359375, + "loss": 0.0432, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.17818590998649597, + "rewards/margins": 0.34219759702682495, + "rewards/rejected": -0.5203834772109985, + "step": 1760 + }, + { + "epoch": 0.34, + "learning_rate": 4.191272048292514e-06, + "logits/chosen": -1.5359946489334106, + "logits/rejected": -1.1761115789413452, + "logps/chosen": -714.53466796875, + "logps/rejected": -1336.4539794921875, + "loss": 0.0709, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.22599129378795624, + "rewards/margins": 0.2689371705055237, + "rewards/rejected": -0.4949284493923187, + "step": 1770 + }, + { + "epoch": 0.34, + "learning_rate": 4.178993605363904e-06, + "logits/chosen": -1.7051982879638672, + "logits/rejected": -1.004176378250122, + "logps/chosen": -622.5487670898438, + "logps/rejected": -1316.314208984375, + "loss": 0.0553, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.15982475876808167, + "rewards/margins": 0.31387537717819214, + "rewards/rejected": -0.4737001061439514, + "step": 1780 + }, + { + "epoch": 0.34, + "learning_rate": 4.166640938570879e-06, + "logits/chosen": -1.5779173374176025, + "logits/rejected": -1.2296186685562134, + "logps/chosen": -609.0247802734375, + "logps/rejected": -1203.373046875, + "loss": 0.0864, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.19219158589839935, + "rewards/margins": 0.26055121421813965, + "rewards/rejected": -0.4527428150177002, + "step": 1790 + }, + { + "epoch": 0.34, + "learning_rate": 4.154214593992149e-06, + "logits/chosen": -1.8979780673980713, + "logits/rejected": -0.9662348628044128, + "logps/chosen": -790.3106079101562, + "logps/rejected": -1465.6546630859375, + "loss": 0.0573, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.2489619255065918, + "rewards/margins": 0.33502355217933655, + "rewards/rejected": -0.583985447883606, + "step": 1800 + }, + { + "epoch": 0.34, + "learning_rate": 4.1417151209635265e-06, + "logits/chosen": -1.5197267532348633, + "logits/rejected": -1.0553096532821655, + "logps/chosen": -655.1430053710938, + "logps/rejected": -1308.0142822265625, + "loss": 0.0688, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.2508172392845154, + "rewards/margins": 0.29503312706947327, + "rewards/rejected": -0.5458503365516663, + "step": 1810 + }, + { + "epoch": 0.35, + "learning_rate": 4.129143072053639e-06, + "logits/chosen": -1.7092673778533936, + "logits/rejected": -1.0430828332901, + "logps/chosen": -774.0709228515625, + "logps/rejected": -1407.5379638671875, + "loss": 0.0717, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.259227991104126, + "rewards/margins": 0.3105766773223877, + "rewards/rejected": -0.5698047280311584, + "step": 1820 + }, + { + "epoch": 0.35, + "learning_rate": 4.116499003039499e-06, + "logits/chosen": -1.3776369094848633, + "logits/rejected": -0.7794798612594604, + "logps/chosen": -695.4063720703125, + "logps/rejected": -1304.434814453125, + "loss": 0.064, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.22463175654411316, + "rewards/margins": 0.31490933895111084, + "rewards/rejected": -0.5395411252975464, + "step": 1830 + }, + { + "epoch": 0.35, + "learning_rate": 4.103783472881942e-06, + "logits/chosen": -1.6217248439788818, + "logits/rejected": -0.9625973701477051, + "logps/chosen": -617.6864624023438, + "logps/rejected": -1318.3341064453125, + "loss": 0.0532, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.16038154065608978, + "rewards/margins": 0.3341715931892395, + "rewards/rejected": -0.49455317854881287, + "step": 1840 + }, + { + "epoch": 0.35, + "learning_rate": 4.0909970437009094e-06, + "logits/chosen": -1.7111873626708984, + "logits/rejected": -1.0022821426391602, + "logps/chosen": -752.5694580078125, + "logps/rejected": -1340.786376953125, + "loss": 0.0797, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.19375209510326385, + "rewards/margins": 0.29098087549209595, + "rewards/rejected": -0.4847329556941986, + "step": 1850 + }, + { + "epoch": 0.35, + "learning_rate": 4.078140280750598e-06, + "logits/chosen": -1.473215103149414, + "logits/rejected": -1.1144354343414307, + "logps/chosen": -651.291015625, + "logps/rejected": -1229.4078369140625, + "loss": 0.0944, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.17073918879032135, + "rewards/margins": 0.27028197050094604, + "rewards/rejected": -0.4410211145877838, + "step": 1860 + }, + { + "epoch": 0.36, + "learning_rate": 4.065213752394478e-06, + "logits/chosen": -1.4655654430389404, + "logits/rejected": -0.9170185327529907, + "logps/chosen": -611.453857421875, + "logps/rejected": -1353.949462890625, + "loss": 0.0492, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.16493529081344604, + "rewards/margins": 0.3255433142185211, + "rewards/rejected": -0.49047860503196716, + "step": 1870 + }, + { + "epoch": 0.36, + "learning_rate": 4.052218030080162e-06, + "logits/chosen": -1.6494277715682983, + "logits/rejected": -0.9448171854019165, + "logps/chosen": -615.288330078125, + "logps/rejected": -1280.2767333984375, + "loss": 0.0619, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.170337975025177, + "rewards/margins": 0.3003271818161011, + "rewards/rejected": -0.4706651568412781, + "step": 1880 + }, + { + "epoch": 0.36, + "learning_rate": 4.039153688314146e-06, + "logits/chosen": -1.4522556066513062, + "logits/rejected": -0.8698140978813171, + "logps/chosen": -699.1949462890625, + "logps/rejected": -1415.995849609375, + "loss": 0.0806, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.21898791193962097, + "rewards/margins": 0.3242533802986145, + "rewards/rejected": -0.5432413220405579, + "step": 1890 + }, + { + "epoch": 0.36, + "learning_rate": 4.026021304636408e-06, + "logits/chosen": -1.6501353979110718, + "logits/rejected": -0.9669955372810364, + "logps/chosen": -650.6583251953125, + "logps/rejected": -1219.7510986328125, + "loss": 0.0777, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.1721065789461136, + "rewards/margins": 0.31094521284103394, + "rewards/rejected": -0.4830518364906311, + "step": 1900 + }, + { + "epoch": 0.36, + "learning_rate": 4.012821459594881e-06, + "logits/chosen": -1.8619811534881592, + "logits/rejected": -1.1319575309753418, + "logps/chosen": -692.3233642578125, + "logps/rejected": -1297.4775390625, + "loss": 0.1147, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.1922949254512787, + "rewards/margins": 0.2827715277671814, + "rewards/rejected": -0.47506648302078247, + "step": 1910 + }, + { + "epoch": 0.37, + "learning_rate": 3.999554736719785e-06, + "logits/chosen": -1.6963351964950562, + "logits/rejected": -1.0642411708831787, + "logps/chosen": -529.7073364257812, + "logps/rejected": -1265.6956787109375, + "loss": 0.0572, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.10542023181915283, + "rewards/margins": 0.31158146262168884, + "rewards/rejected": -0.4170016646385193, + "step": 1920 + }, + { + "epoch": 0.37, + "learning_rate": 3.986221722497832e-06, + "logits/chosen": -1.8552402257919312, + "logits/rejected": -1.0955214500427246, + "logps/chosen": -552.8602294921875, + "logps/rejected": -1160.6795654296875, + "loss": 0.0736, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.0734359547495842, + "rewards/margins": 0.3090042471885681, + "rewards/rejected": -0.3824402391910553, + "step": 1930 + }, + { + "epoch": 0.37, + "learning_rate": 3.9728230063463e-06, + "logits/chosen": -1.6904840469360352, + "logits/rejected": -0.9247332811355591, + "logps/chosen": -652.7967529296875, + "logps/rejected": -1129.207763671875, + "loss": 0.0962, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.14699819684028625, + "rewards/margins": 0.23404502868652344, + "rewards/rejected": -0.3810432553291321, + "step": 1940 + }, + { + "epoch": 0.37, + "learning_rate": 3.9593591805869755e-06, + "logits/chosen": -1.4540979862213135, + "logits/rejected": -1.1804428100585938, + "logps/chosen": -464.2632751464844, + "logps/rejected": -1101.6966552734375, + "loss": 0.1024, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11493609845638275, + "rewards/margins": 0.2585905194282532, + "rewards/rejected": -0.3735266327857971, + "step": 1950 + }, + { + "epoch": 0.37, + "learning_rate": 3.945830840419966e-06, + "logits/chosen": -1.5080819129943848, + "logits/rejected": -1.2832539081573486, + "logps/chosen": -410.7259216308594, + "logps/rejected": -1009.2428588867188, + "loss": 0.0981, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.12702463567256927, + "rewards/margins": 0.23238396644592285, + "rewards/rejected": -0.35940855741500854, + "step": 1960 + }, + { + "epoch": 0.38, + "learning_rate": 3.932238583897395e-06, + "logits/chosen": -1.5733039379119873, + "logits/rejected": -1.2267423868179321, + "logps/chosen": -605.8873901367188, + "logps/rejected": -1320.41650390625, + "loss": 0.0694, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.17582616209983826, + "rewards/margins": 0.3171050250530243, + "rewards/rejected": -0.49293118715286255, + "step": 1970 + }, + { + "epoch": 0.38, + "learning_rate": 3.918583011896955e-06, + "logits/chosen": -1.406954050064087, + "logits/rejected": -1.0951248407363892, + "logps/chosen": -575.7022705078125, + "logps/rejected": -1197.1514892578125, + "loss": 0.0894, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.20731861889362335, + "rewards/margins": 0.25613903999328613, + "rewards/rejected": -0.46345773339271545, + "step": 1980 + }, + { + "epoch": 0.38, + "learning_rate": 3.904864728095349e-06, + "logits/chosen": -1.706703543663025, + "logits/rejected": -1.1287411451339722, + "logps/chosen": -643.6716918945312, + "logps/rejected": -1208.3297119140625, + "loss": 0.0859, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.16700610518455505, + "rewards/margins": 0.2838577926158905, + "rewards/rejected": -0.4508638381958008, + "step": 1990 + }, + { + "epoch": 0.38, + "learning_rate": 3.891084338941603e-06, + "logits/chosen": -1.658524751663208, + "logits/rejected": -0.8833999633789062, + "logps/chosen": -667.8445434570312, + "logps/rejected": -1186.5399169921875, + "loss": 0.0769, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.17360621690750122, + "rewards/margins": 0.27768850326538086, + "rewards/rejected": -0.4512947201728821, + "step": 2000 + }, + { + "epoch": 0.38, + "learning_rate": 3.8772424536302565e-06, + "logits/chosen": -1.5163154602050781, + "logits/rejected": -0.9248741865158081, + "logps/chosen": -573.7681884765625, + "logps/rejected": -1262.5050048828125, + "loss": 0.0581, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.1913374364376068, + "rewards/margins": 0.3188498616218567, + "rewards/rejected": -0.5101873278617859, + "step": 2010 + }, + { + "epoch": 0.38, + "learning_rate": 3.863339684074432e-06, + "logits/chosen": -1.5402344465255737, + "logits/rejected": -0.9123631715774536, + "logps/chosen": -709.202392578125, + "logps/rejected": -1308.1771240234375, + "loss": 0.0794, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.24668677151203156, + "rewards/margins": 0.2680239975452423, + "rewards/rejected": -0.5147107839584351, + "step": 2020 + }, + { + "epoch": 0.39, + "learning_rate": 3.849376644878783e-06, + "logits/chosen": -1.5158917903900146, + "logits/rejected": -1.0071513652801514, + "logps/chosen": -599.4667358398438, + "logps/rejected": -1142.518798828125, + "loss": 0.1031, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.18736699223518372, + "rewards/margins": 0.24289944767951965, + "rewards/rejected": -0.43026643991470337, + "step": 2030 + }, + { + "epoch": 0.39, + "learning_rate": 3.835353953312322e-06, + "logits/chosen": -1.3921092748641968, + "logits/rejected": -1.161055326461792, + "logps/chosen": -493.7007751464844, + "logps/rejected": -1077.16455078125, + "loss": 0.0943, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.14471833407878876, + "rewards/margins": 0.25077611207962036, + "rewards/rejected": -0.3954944312572479, + "step": 2040 + }, + { + "epoch": 0.39, + "learning_rate": 3.821272229281139e-06, + "logits/chosen": -1.4992625713348389, + "logits/rejected": -1.0259852409362793, + "logps/chosen": -612.4508056640625, + "logps/rejected": -1262.510498046875, + "loss": 0.0754, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.15795397758483887, + "rewards/margins": 0.28065261244773865, + "rewards/rejected": -0.4386065602302551, + "step": 2050 + }, + { + "epoch": 0.39, + "learning_rate": 3.8071320953009906e-06, + "logits/chosen": -1.6317722797393799, + "logits/rejected": -1.0791391134262085, + "logps/chosen": -639.4310913085938, + "logps/rejected": -1332.7215576171875, + "loss": 0.0858, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.12871623039245605, + "rewards/margins": 0.3394959568977356, + "rewards/rejected": -0.46821218729019165, + "step": 2060 + }, + { + "epoch": 0.39, + "learning_rate": 3.792934176469782e-06, + "logits/chosen": -1.7077200412750244, + "logits/rejected": -1.005631685256958, + "logps/chosen": -544.0747680664062, + "logps/rejected": -1025.9820556640625, + "loss": 0.0778, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.11793072521686554, + "rewards/margins": 0.2298382818698883, + "rewards/rejected": -0.34776902198791504, + "step": 2070 + }, + { + "epoch": 0.4, + "learning_rate": 3.7786791004399353e-06, + "logits/chosen": -1.5485365390777588, + "logits/rejected": -1.1217293739318848, + "logps/chosen": -728.8970947265625, + "logps/rejected": -1328.073486328125, + "loss": 0.0871, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.1932309865951538, + "rewards/margins": 0.2715621888637543, + "rewards/rejected": -0.46479320526123047, + "step": 2080 + }, + { + "epoch": 0.4, + "learning_rate": 3.764367497390642e-06, + "logits/chosen": -1.6724570989608765, + "logits/rejected": -0.9786966443061829, + "logps/chosen": -723.045166015625, + "logps/rejected": -1434.869873046875, + "loss": 0.0727, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.2174513041973114, + "rewards/margins": 0.30513912439346313, + "rewards/rejected": -0.5225903987884521, + "step": 2090 + }, + { + "epoch": 0.4, + "learning_rate": 3.7500000000000005e-06, + "logits/chosen": -1.5985078811645508, + "logits/rejected": -1.1455590724945068, + "logps/chosen": -587.4654541015625, + "logps/rejected": -1450.05419921875, + "loss": 0.0538, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.151766836643219, + "rewards/margins": 0.36734539270401, + "rewards/rejected": -0.5191121697425842, + "step": 2100 + }, + { + "epoch": 0.4, + "learning_rate": 3.7355772434170523e-06, + "logits/chosen": -1.6532714366912842, + "logits/rejected": -0.9889874458312988, + "logps/chosen": -683.9132690429688, + "logps/rejected": -1231.6641845703125, + "loss": 0.0865, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.1688445806503296, + "rewards/margins": 0.2573260962963104, + "rewards/rejected": -0.42617067694664, + "step": 2110 + }, + { + "epoch": 0.4, + "learning_rate": 3.7210998652337016e-06, + "logits/chosen": -1.560417890548706, + "logits/rejected": -0.908432126045227, + "logps/chosen": -532.7721557617188, + "logps/rejected": -1283.0491943359375, + "loss": 0.0665, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.13333137333393097, + "rewards/margins": 0.32335206866264343, + "rewards/rejected": -0.4566834568977356, + "step": 2120 + }, + { + "epoch": 0.41, + "learning_rate": 3.7065685054565277e-06, + "logits/chosen": -1.462009310722351, + "logits/rejected": -1.0526163578033447, + "logps/chosen": -528.8088989257812, + "logps/rejected": -1030.3155517578125, + "loss": 0.0945, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10231202840805054, + "rewards/margins": 0.24501392245292664, + "rewards/rejected": -0.3473259210586548, + "step": 2130 + }, + { + "epoch": 0.41, + "learning_rate": 3.691983806478494e-06, + "logits/chosen": -1.4341996908187866, + "logits/rejected": -1.0435141324996948, + "logps/chosen": -599.2943115234375, + "logps/rejected": -1383.003173828125, + "loss": 0.0725, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.15288788080215454, + "rewards/margins": 0.33814504742622375, + "rewards/rejected": -0.4910329282283783, + "step": 2140 + }, + { + "epoch": 0.41, + "learning_rate": 3.677346413050551e-06, + "logits/chosen": -1.4380934238433838, + "logits/rejected": -0.8723732829093933, + "logps/chosen": -682.8527221679688, + "logps/rejected": -1212.371826171875, + "loss": 0.1084, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.17531666159629822, + "rewards/margins": 0.25657838582992554, + "rewards/rejected": -0.43189501762390137, + "step": 2150 + }, + { + "epoch": 0.41, + "learning_rate": 3.6626569722531268e-06, + "logits/chosen": -1.407714605331421, + "logits/rejected": -0.9261984825134277, + "logps/chosen": -701.3203125, + "logps/rejected": -1251.0504150390625, + "loss": 0.0886, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.17605462670326233, + "rewards/margins": 0.24869613349437714, + "rewards/rejected": -0.4247507154941559, + "step": 2160 + }, + { + "epoch": 0.41, + "learning_rate": 3.6479161334675294e-06, + "logits/chosen": -1.385465145111084, + "logits/rejected": -1.017727017402649, + "logps/chosen": -544.0186767578125, + "logps/rejected": -1228.675537109375, + "loss": 0.0593, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.1482720822095871, + "rewards/margins": 0.2906687259674072, + "rewards/rejected": -0.4389408528804779, + "step": 2170 + }, + { + "epoch": 0.42, + "learning_rate": 3.6331245483472353e-06, + "logits/chosen": -1.6134536266326904, + "logits/rejected": -0.9634687304496765, + "logps/chosen": -553.2294311523438, + "logps/rejected": -1121.5455322265625, + "loss": 0.0862, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.14353647828102112, + "rewards/margins": 0.2658248543739319, + "rewards/rejected": -0.4093613028526306, + "step": 2180 + }, + { + "epoch": 0.42, + "learning_rate": 3.6182828707890816e-06, + "logits/chosen": -1.6181968450546265, + "logits/rejected": -1.0317254066467285, + "logps/chosen": -711.2445068359375, + "logps/rejected": -1320.489501953125, + "loss": 0.0524, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.20726224780082703, + "rewards/margins": 0.29739540815353394, + "rewards/rejected": -0.5046576857566833, + "step": 2190 + }, + { + "epoch": 0.42, + "learning_rate": 3.6033917569043604e-06, + "logits/chosen": -1.510839819908142, + "logits/rejected": -0.9395051002502441, + "logps/chosen": -561.166015625, + "logps/rejected": -1221.585205078125, + "loss": 0.0543, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.14164069294929504, + "rewards/margins": 0.31262117624282837, + "rewards/rejected": -0.454261839389801, + "step": 2200 + }, + { + "epoch": 0.42, + "learning_rate": 3.588451864989811e-06, + "logits/chosen": -1.5775179862976074, + "logits/rejected": -1.0709121227264404, + "logps/chosen": -516.7582397460938, + "logps/rejected": -1127.0125732421875, + "loss": 0.0696, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.1386266052722931, + "rewards/margins": 0.3021091818809509, + "rewards/rejected": -0.44073575735092163, + "step": 2210 + }, + { + "epoch": 0.42, + "learning_rate": 3.5734638554985234e-06, + "logits/chosen": -1.7261450290679932, + "logits/rejected": -0.9754320383071899, + "logps/chosen": -618.065673828125, + "logps/rejected": -1307.2408447265625, + "loss": 0.0597, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.13468700647354126, + "rewards/margins": 0.3289644122123718, + "rewards/rejected": -0.4636514186859131, + "step": 2220 + }, + { + "epoch": 0.42, + "learning_rate": 3.5584283910107343e-06, + "logits/chosen": -1.7003357410430908, + "logits/rejected": -1.0963810682296753, + "logps/chosen": -618.1256103515625, + "logps/rejected": -1277.525634765625, + "loss": 0.0583, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.1276676058769226, + "rewards/margins": 0.3247441351413727, + "rewards/rejected": -0.4524117410182953, + "step": 2230 + }, + { + "epoch": 0.43, + "learning_rate": 3.543346136204545e-06, + "logits/chosen": -1.4668004512786865, + "logits/rejected": -0.9574554562568665, + "logps/chosen": -630.6279296875, + "logps/rejected": -1333.8809814453125, + "loss": 0.0762, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.18021656572818756, + "rewards/margins": 0.3060380518436432, + "rewards/rejected": -0.48625463247299194, + "step": 2240 + }, + { + "epoch": 0.43, + "learning_rate": 3.5282177578265295e-06, + "logits/chosen": -1.3924225568771362, + "logits/rejected": -0.8624798655509949, + "logps/chosen": -583.6441650390625, + "logps/rejected": -1162.06640625, + "loss": 0.0732, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.13777074217796326, + "rewards/margins": 0.300098717212677, + "rewards/rejected": -0.43786945939064026, + "step": 2250 + }, + { + "epoch": 0.43, + "learning_rate": 3.5130439246622635e-06, + "logits/chosen": -1.4143173694610596, + "logits/rejected": -0.8775388598442078, + "logps/chosen": -585.23583984375, + "logps/rejected": -1257.826904296875, + "loss": 0.0709, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.17448213696479797, + "rewards/margins": 0.32015711069107056, + "rewards/rejected": -0.49463921785354614, + "step": 2260 + }, + { + "epoch": 0.43, + "learning_rate": 3.497825307506758e-06, + "logits/chosen": -1.631155252456665, + "logits/rejected": -0.947667121887207, + "logps/chosen": -575.4785766601562, + "logps/rejected": -1216.88525390625, + "loss": 0.0858, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.19915080070495605, + "rewards/margins": 0.2901036739349365, + "rewards/rejected": -0.48925453424453735, + "step": 2270 + }, + { + "epoch": 0.43, + "learning_rate": 3.4825625791348093e-06, + "logits/chosen": -1.5258907079696655, + "logits/rejected": -0.9407553672790527, + "logps/chosen": -690.2452392578125, + "logps/rejected": -1400.36572265625, + "loss": 0.0567, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.2197599709033966, + "rewards/margins": 0.328549325466156, + "rewards/rejected": -0.5483092665672302, + "step": 2280 + }, + { + "epoch": 0.44, + "learning_rate": 3.467256414271249e-06, + "logits/chosen": -1.3535867929458618, + "logits/rejected": -0.7650678753852844, + "logps/chosen": -732.3662719726562, + "logps/rejected": -1243.236328125, + "loss": 0.0992, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.21381525695323944, + "rewards/margins": 0.27646294236183167, + "rewards/rejected": -0.4902781844139099, + "step": 2290 + }, + { + "epoch": 0.44, + "learning_rate": 3.4519074895611245e-06, + "logits/chosen": -1.2721103429794312, + "logits/rejected": -0.9371077418327332, + "logps/chosen": -701.7020263671875, + "logps/rejected": -1331.9573974609375, + "loss": 0.0805, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.2324238270521164, + "rewards/margins": 0.27934929728507996, + "rewards/rejected": -0.5117732286453247, + "step": 2300 + }, + { + "epoch": 0.44, + "learning_rate": 3.436516483539781e-06, + "logits/chosen": -1.5690281391143799, + "logits/rejected": -1.041538953781128, + "logps/chosen": -624.0325927734375, + "logps/rejected": -1159.1929931640625, + "loss": 0.0922, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.20282158255577087, + "rewards/margins": 0.2440890371799469, + "rewards/rejected": -0.44691067934036255, + "step": 2310 + }, + { + "epoch": 0.44, + "learning_rate": 3.421084076602867e-06, + "logits/chosen": -1.6616109609603882, + "logits/rejected": -1.080195426940918, + "logps/chosen": -724.8908081054688, + "logps/rejected": -1440.558837890625, + "loss": 0.0344, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.2283150851726532, + "rewards/margins": 0.3158523738384247, + "rewards/rejected": -0.5441675186157227, + "step": 2320 + }, + { + "epoch": 0.44, + "learning_rate": 3.405610950976257e-06, + "logits/chosen": -1.6437885761260986, + "logits/rejected": -0.6748986840248108, + "logps/chosen": -597.8292846679688, + "logps/rejected": -1239.0748291015625, + "loss": 0.0444, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.14971794188022614, + "rewards/margins": 0.32578349113464355, + "rewards/rejected": -0.4755013883113861, + "step": 2330 + }, + { + "epoch": 0.45, + "learning_rate": 3.3900977906858923e-06, + "logits/chosen": -1.4760338068008423, + "logits/rejected": -1.0113236904144287, + "logps/chosen": -593.802734375, + "logps/rejected": -1221.1602783203125, + "loss": 0.0987, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1533535122871399, + "rewards/margins": 0.2769462466239929, + "rewards/rejected": -0.4302998185157776, + "step": 2340 + }, + { + "epoch": 0.45, + "learning_rate": 3.3745452815275375e-06, + "logits/chosen": -1.625119924545288, + "logits/rejected": -0.9021151661872864, + "logps/chosen": -557.1563720703125, + "logps/rejected": -1177.1480712890625, + "loss": 0.0825, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.1604781150817871, + "rewards/margins": 0.3083071708679199, + "rewards/rejected": -0.46878522634506226, + "step": 2350 + }, + { + "epoch": 0.45, + "learning_rate": 3.3589541110364678e-06, + "logits/chosen": -1.6174547672271729, + "logits/rejected": -1.021515130996704, + "logps/chosen": -600.9921875, + "logps/rejected": -1149.3212890625, + "loss": 0.0879, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1592303216457367, + "rewards/margins": 0.2715124487876892, + "rewards/rejected": -0.4307428002357483, + "step": 2360 + }, + { + "epoch": 0.45, + "learning_rate": 3.3433249684570757e-06, + "logits/chosen": -1.6038291454315186, + "logits/rejected": -0.984597384929657, + "logps/chosen": -691.2508544921875, + "logps/rejected": -1356.6763916015625, + "loss": 0.0788, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.1866895705461502, + "rewards/margins": 0.31433457136154175, + "rewards/rejected": -0.5010241270065308, + "step": 2370 + }, + { + "epoch": 0.45, + "learning_rate": 3.3276585447123957e-06, + "logits/chosen": -1.625759482383728, + "logits/rejected": -0.9024505615234375, + "logps/chosen": -655.2415771484375, + "logps/rejected": -1165.357666015625, + "loss": 0.079, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.14526958763599396, + "rewards/margins": 0.23506641387939453, + "rewards/rejected": -0.3803360164165497, + "step": 2380 + }, + { + "epoch": 0.46, + "learning_rate": 3.3119555323735664e-06, + "logits/chosen": -1.5876622200012207, + "logits/rejected": -0.9881450533866882, + "logps/chosen": -586.2982177734375, + "logps/rejected": -1114.760498046875, + "loss": 0.089, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.14424452185630798, + "rewards/margins": 0.24635834991931915, + "rewards/rejected": -0.3906029164791107, + "step": 2390 + }, + { + "epoch": 0.46, + "learning_rate": 3.2962166256292116e-06, + "logits/chosen": -1.7776038646697998, + "logits/rejected": -0.9887340664863586, + "logps/chosen": -622.7711791992188, + "logps/rejected": -1265.460693359375, + "loss": 0.0615, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.16243186593055725, + "rewards/margins": 0.31626278162002563, + "rewards/rejected": -0.4786946177482605, + "step": 2400 + }, + { + "epoch": 0.46, + "learning_rate": 3.2804425202547494e-06, + "logits/chosen": -1.6128498315811157, + "logits/rejected": -1.0431678295135498, + "logps/chosen": -668.9735107421875, + "logps/rejected": -1388.3677978515625, + "loss": 0.0744, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.2042228728532791, + "rewards/margins": 0.31508609652519226, + "rewards/rejected": -0.5193089246749878, + "step": 2410 + }, + { + "epoch": 0.46, + "learning_rate": 3.2646339135816386e-06, + "logits/chosen": -1.6792058944702148, + "logits/rejected": -1.1176880598068237, + "logps/chosen": -575.1004638671875, + "logps/rejected": -1309.0078125, + "loss": 0.0722, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.1628337800502777, + "rewards/margins": 0.32463642954826355, + "rewards/rejected": -0.48747020959854126, + "step": 2420 + }, + { + "epoch": 0.46, + "learning_rate": 3.2487915044665485e-06, + "logits/chosen": -1.3517749309539795, + "logits/rejected": -0.8303594589233398, + "logps/chosen": -623.1898193359375, + "logps/rejected": -1244.054931640625, + "loss": 0.1096, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.20480790734291077, + "rewards/margins": 0.27906081080436707, + "rewards/rejected": -0.48386868834495544, + "step": 2430 + }, + { + "epoch": 0.46, + "learning_rate": 3.2329159932604638e-06, + "logits/chosen": -1.2916462421417236, + "logits/rejected": -0.7104039192199707, + "logps/chosen": -619.9585571289062, + "logps/rejected": -1238.257568359375, + "loss": 0.0759, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.17360931634902954, + "rewards/margins": 0.2984062731266022, + "rewards/rejected": -0.4720155596733093, + "step": 2440 + }, + { + "epoch": 0.47, + "learning_rate": 3.217008081777726e-06, + "logits/chosen": -1.5925421714782715, + "logits/rejected": -1.388627290725708, + "logps/chosen": -634.1315307617188, + "logps/rejected": -1347.8963623046875, + "loss": 0.0678, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.21671870350837708, + "rewards/margins": 0.3036806285381317, + "rewards/rejected": -0.5203993916511536, + "step": 2450 + }, + { + "epoch": 0.47, + "learning_rate": 3.201068473265007e-06, + "logits/chosen": -1.611342430114746, + "logits/rejected": -1.019357442855835, + "logps/chosen": -568.1485595703125, + "logps/rejected": -1200.0748291015625, + "loss": 0.0905, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.14752653241157532, + "rewards/margins": 0.2930060923099518, + "rewards/rejected": -0.4405326843261719, + "step": 2460 + }, + { + "epoch": 0.47, + "learning_rate": 3.1850978723702213e-06, + "logits/chosen": -1.7960550785064697, + "logits/rejected": -0.867364764213562, + "logps/chosen": -668.3005981445312, + "logps/rejected": -1216.97705078125, + "loss": 0.0916, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.174250066280365, + "rewards/margins": 0.27992844581604004, + "rewards/rejected": -0.45417851209640503, + "step": 2470 + }, + { + "epoch": 0.47, + "learning_rate": 3.1690969851113724e-06, + "logits/chosen": -1.581278681755066, + "logits/rejected": -1.1784000396728516, + "logps/chosen": -590.9406127929688, + "logps/rejected": -1250.147216796875, + "loss": 0.0727, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1811566799879074, + "rewards/margins": 0.29458481073379517, + "rewards/rejected": -0.47574153542518616, + "step": 2480 + }, + { + "epoch": 0.47, + "learning_rate": 3.1530665188453463e-06, + "logits/chosen": -1.3773750066757202, + "logits/rejected": -0.6731222867965698, + "logps/chosen": -648.3175659179688, + "logps/rejected": -1236.1195068359375, + "loss": 0.0595, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.20099523663520813, + "rewards/margins": 0.27121472358703613, + "rewards/rejected": -0.4722098708152771, + "step": 2490 + }, + { + "epoch": 0.48, + "learning_rate": 3.137007182236637e-06, + "logits/chosen": -1.514936089515686, + "logits/rejected": -0.6673987507820129, + "logps/chosen": -754.236083984375, + "logps/rejected": -1341.1190185546875, + "loss": 0.0783, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.196518212556839, + "rewards/margins": 0.2775440514087677, + "rewards/rejected": -0.4740622937679291, + "step": 2500 + }, + { + "epoch": 0.48, + "learning_rate": 3.1209196852260204e-06, + "logits/chosen": -1.3017624616622925, + "logits/rejected": -1.00785231590271, + "logps/chosen": -599.2673950195312, + "logps/rejected": -1185.9400634765625, + "loss": 0.0936, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.17537134885787964, + "rewards/margins": 0.24747446179389954, + "rewards/rejected": -0.4228457808494568, + "step": 2510 + }, + { + "epoch": 0.48, + "learning_rate": 3.1048047389991693e-06, + "logits/chosen": -1.690582036972046, + "logits/rejected": -1.104832410812378, + "logps/chosen": -619.1864624023438, + "logps/rejected": -1118.77197265625, + "loss": 0.0764, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.13324618339538574, + "rewards/margins": 0.25045424699783325, + "rewards/rejected": -0.383700430393219, + "step": 2520 + }, + { + "epoch": 0.48, + "learning_rate": 3.0886630559552144e-06, + "logits/chosen": -1.3345118761062622, + "logits/rejected": -0.9842830896377563, + "logps/chosen": -694.3142700195312, + "logps/rejected": -1348.347412109375, + "loss": 0.0762, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.14755675196647644, + "rewards/margins": 0.30060485005378723, + "rewards/rejected": -0.44816160202026367, + "step": 2530 + }, + { + "epoch": 0.48, + "learning_rate": 3.072495349675249e-06, + "logits/chosen": -1.6009747982025146, + "logits/rejected": -0.8453266024589539, + "logps/chosen": -539.679931640625, + "logps/rejected": -1170.426025390625, + "loss": 0.071, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.1122439056634903, + "rewards/margins": 0.2621922492980957, + "rewards/rejected": -0.3744361996650696, + "step": 2540 + }, + { + "epoch": 0.49, + "learning_rate": 3.056302334890786e-06, + "logits/chosen": -1.6523168087005615, + "logits/rejected": -1.110414743423462, + "logps/chosen": -545.8953247070312, + "logps/rejected": -1274.470458984375, + "loss": 0.0555, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.1152963787317276, + "rewards/margins": 0.3395312428474426, + "rewards/rejected": -0.4548276364803314, + "step": 2550 + }, + { + "epoch": 0.49, + "learning_rate": 3.04008472745216e-06, + "logits/chosen": -1.7794824838638306, + "logits/rejected": -0.8890671730041504, + "logps/chosen": -637.3228759765625, + "logps/rejected": -1193.5184326171875, + "loss": 0.0773, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.16369478404521942, + "rewards/margins": 0.2698102593421936, + "rewards/rejected": -0.43350496888160706, + "step": 2560 + }, + { + "epoch": 0.49, + "learning_rate": 3.0238432442968803e-06, + "logits/chosen": -1.5971051454544067, + "logits/rejected": -0.9152080416679382, + "logps/chosen": -570.8718872070312, + "logps/rejected": -1310.4375, + "loss": 0.053, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13165855407714844, + "rewards/margins": 0.33412402868270874, + "rewards/rejected": -0.46578264236450195, + "step": 2570 + }, + { + "epoch": 0.49, + "learning_rate": 3.0075786034179407e-06, + "logits/chosen": -1.2595056295394897, + "logits/rejected": -0.8606653213500977, + "logps/chosen": -552.0274658203125, + "logps/rejected": -1287.727783203125, + "loss": 0.0654, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.1181732639670372, + "rewards/margins": 0.3271896541118622, + "rewards/rejected": -0.4453628957271576, + "step": 2580 + }, + { + "epoch": 0.49, + "learning_rate": 2.9912915238320755e-06, + "logits/chosen": -1.5348694324493408, + "logits/rejected": -0.9599231481552124, + "logps/chosen": -560.0164184570312, + "logps/rejected": -1273.4046630859375, + "loss": 0.0714, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.1421751081943512, + "rewards/margins": 0.29482603073120117, + "rewards/rejected": -0.43700116872787476, + "step": 2590 + }, + { + "epoch": 0.5, + "learning_rate": 2.974982725547976e-06, + "logits/chosen": -1.4486770629882812, + "logits/rejected": -1.0592507123947144, + "logps/chosen": -680.0877685546875, + "logps/rejected": -1446.515869140625, + "loss": 0.0431, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.17707717418670654, + "rewards/margins": 0.3384220600128174, + "rewards/rejected": -0.5154992341995239, + "step": 2600 + }, + { + "epoch": 0.5, + "learning_rate": 2.958652929534456e-06, + "logits/chosen": -1.2989368438720703, + "logits/rejected": -1.0571434497833252, + "logps/chosen": -603.8392333984375, + "logps/rejected": -1268.223876953125, + "loss": 0.1061, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.18222357332706451, + "rewards/margins": 0.2554742693901062, + "rewards/rejected": -0.4376978278160095, + "step": 2610 + }, + { + "epoch": 0.5, + "learning_rate": 2.9423028576885894e-06, + "logits/chosen": -1.5412323474884033, + "logits/rejected": -1.0013290643692017, + "logps/chosen": -640.44921875, + "logps/rejected": -1294.619873046875, + "loss": 0.0688, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.14701518416404724, + "rewards/margins": 0.29219189286231995, + "rewards/rejected": -0.4392070770263672, + "step": 2620 + }, + { + "epoch": 0.5, + "learning_rate": 2.9259332328037852e-06, + "logits/chosen": -1.396354079246521, + "logits/rejected": -0.7972344756126404, + "logps/chosen": -504.8910217285156, + "logps/rejected": -1190.3065185546875, + "loss": 0.0614, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1175163984298706, + "rewards/margins": 0.2946506142616272, + "rewards/rejected": -0.4121670722961426, + "step": 2630 + }, + { + "epoch": 0.5, + "learning_rate": 2.9095447785378446e-06, + "logits/chosen": -1.371985673904419, + "logits/rejected": -0.9223454594612122, + "logps/chosen": -605.7084350585938, + "logps/rejected": -1249.2154541015625, + "loss": 0.0785, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.13958588242530823, + "rewards/margins": 0.3104739189147949, + "rewards/rejected": -0.45005980134010315, + "step": 2640 + }, + { + "epoch": 0.5, + "learning_rate": 2.893138219380964e-06, + "logits/chosen": -1.2879749536514282, + "logits/rejected": -0.7854377627372742, + "logps/chosen": -605.78271484375, + "logps/rejected": -1397.0079345703125, + "loss": 0.0434, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.125517338514328, + "rewards/margins": 0.345243901014328, + "rewards/rejected": -0.4707612097263336, + "step": 2650 + }, + { + "epoch": 0.51, + "learning_rate": 2.876714280623708e-06, + "logits/chosen": -1.526444673538208, + "logits/rejected": -0.7589839100837708, + "logps/chosen": -647.5239868164062, + "logps/rejected": -1338.138427734375, + "loss": 0.0568, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.13042430579662323, + "rewards/margins": 0.32712113857269287, + "rewards/rejected": -0.45754551887512207, + "step": 2660 + }, + { + "epoch": 0.51, + "learning_rate": 2.8602736883249504e-06, + "logits/chosen": -1.6594903469085693, + "logits/rejected": -0.9879050254821777, + "logps/chosen": -514.2203979492188, + "logps/rejected": -1181.108154296875, + "loss": 0.0527, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0958632081747055, + "rewards/margins": 0.3106859624385834, + "rewards/rejected": -0.4065491557121277, + "step": 2670 + }, + { + "epoch": 0.51, + "learning_rate": 2.843817169279772e-06, + "logits/chosen": -1.4798604249954224, + "logits/rejected": -0.861343264579773, + "logps/chosen": -565.4484252929688, + "logps/rejected": -1215.5482177734375, + "loss": 0.0675, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.1356905698776245, + "rewards/margins": 0.29588332772254944, + "rewards/rejected": -0.43157386779785156, + "step": 2680 + }, + { + "epoch": 0.51, + "learning_rate": 2.8273454509873333e-06, + "logits/chosen": -1.714463472366333, + "logits/rejected": -0.9027876853942871, + "logps/chosen": -557.419189453125, + "logps/rejected": -1265.908935546875, + "loss": 0.0516, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10656454414129257, + "rewards/margins": 0.33830881118774414, + "rewards/rejected": -0.4448733925819397, + "step": 2690 + }, + { + "epoch": 0.51, + "learning_rate": 2.8108592616187135e-06, + "logits/chosen": -1.7163057327270508, + "logits/rejected": -1.0684901475906372, + "logps/chosen": -590.5923461914062, + "logps/rejected": -1185.128173828125, + "loss": 0.0723, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.13913396000862122, + "rewards/margins": 0.26712125539779663, + "rewards/rejected": -0.40625524520874023, + "step": 2700 + }, + { + "epoch": 0.52, + "learning_rate": 2.7943593299847186e-06, + "logits/chosen": -1.6501725912094116, + "logits/rejected": -0.7960497736930847, + "logps/chosen": -609.9940185546875, + "logps/rejected": -1179.6832275390625, + "loss": 0.0629, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15014216303825378, + "rewards/margins": 0.286409467458725, + "rewards/rejected": -0.43655166029930115, + "step": 2710 + }, + { + "epoch": 0.52, + "learning_rate": 2.7778463855036656e-06, + "logits/chosen": -1.40164315700531, + "logits/rejected": -0.8055755496025085, + "logps/chosen": -687.6304931640625, + "logps/rejected": -1379.4273681640625, + "loss": 0.0578, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.16296520829200745, + "rewards/margins": 0.3322266936302185, + "rewards/rejected": -0.4951918125152588, + "step": 2720 + }, + { + "epoch": 0.52, + "learning_rate": 2.761321158169134e-06, + "logits/chosen": -1.5054407119750977, + "logits/rejected": -0.9149681329727173, + "logps/chosen": -664.0867309570312, + "logps/rejected": -1220.485107421875, + "loss": 0.086, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.18236112594604492, + "rewards/margins": 0.2533378303050995, + "rewards/rejected": -0.435698926448822, + "step": 2730 + }, + { + "epoch": 0.52, + "learning_rate": 2.7447843785176958e-06, + "logits/chosen": -1.680153489112854, + "logits/rejected": -1.1227161884307861, + "logps/chosen": -620.3820190429688, + "logps/rejected": -1187.011474609375, + "loss": 0.086, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.13061869144439697, + "rewards/margins": 0.29502156376838684, + "rewards/rejected": -0.4256402850151062, + "step": 2740 + }, + { + "epoch": 0.52, + "learning_rate": 2.728236777596621e-06, + "logits/chosen": -1.6802221536636353, + "logits/rejected": -0.9513761401176453, + "logps/chosen": -532.1759643554688, + "logps/rejected": -1148.6627197265625, + "loss": 0.0723, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.10019969940185547, + "rewards/margins": 0.30757206678390503, + "rewards/rejected": -0.4077717661857605, + "step": 2750 + }, + { + "epoch": 0.53, + "learning_rate": 2.7116790869315583e-06, + "logits/chosen": -1.591932773590088, + "logits/rejected": -0.9616080522537231, + "logps/chosen": -556.04296875, + "logps/rejected": -1179.85791015625, + "loss": 0.0578, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.13644689321517944, + "rewards/margins": 0.290695458650589, + "rewards/rejected": -0.42714232206344604, + "step": 2760 + }, + { + "epoch": 0.53, + "learning_rate": 2.695112038494198e-06, + "logits/chosen": -1.633966088294983, + "logits/rejected": -0.9920031428337097, + "logps/chosen": -703.5379028320312, + "logps/rejected": -1436.71533203125, + "loss": 0.0612, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.17566515505313873, + "rewards/margins": 0.3405109643936157, + "rewards/rejected": -0.516176164150238, + "step": 2770 + }, + { + "epoch": 0.53, + "learning_rate": 2.6785363646699125e-06, + "logits/chosen": -1.5663446187973022, + "logits/rejected": -0.9617182612419128, + "logps/chosen": -687.4505615234375, + "logps/rejected": -1352.730224609375, + "loss": 0.0586, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.1697978675365448, + "rewards/margins": 0.31239965558052063, + "rewards/rejected": -0.48219752311706543, + "step": 2780 + }, + { + "epoch": 0.53, + "learning_rate": 2.6619527982253796e-06, + "logits/chosen": -1.663469910621643, + "logits/rejected": -1.2072179317474365, + "logps/chosen": -623.8106689453125, + "logps/rejected": -1280.404052734375, + "loss": 0.0712, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14441026747226715, + "rewards/margins": 0.30307531356811523, + "rewards/rejected": -0.4474855959415436, + "step": 2790 + }, + { + "epoch": 0.53, + "learning_rate": 2.6453620722761897e-06, + "logits/chosen": -1.5637850761413574, + "logits/rejected": -1.064086675643921, + "logps/chosen": -672.9819946289062, + "logps/rejected": -1308.570556640625, + "loss": 0.1009, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.10943061113357544, + "rewards/margins": 0.3063245117664337, + "rewards/rejected": -0.41575512290000916, + "step": 2800 + }, + { + "epoch": 0.54, + "learning_rate": 2.628764920254435e-06, + "logits/chosen": -1.5698572397232056, + "logits/rejected": -1.0133156776428223, + "logps/chosen": -510.04412841796875, + "logps/rejected": -1206.696533203125, + "loss": 0.0806, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.04279591515660286, + "rewards/margins": 0.33799928426742554, + "rewards/rejected": -0.3807952404022217, + "step": 2810 + }, + { + "epoch": 0.54, + "learning_rate": 2.6121620758762877e-06, + "logits/chosen": -1.9097235202789307, + "logits/rejected": -1.2351771593093872, + "logps/chosen": -511.05865478515625, + "logps/rejected": -1213.244873046875, + "loss": 0.0559, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.04127006232738495, + "rewards/margins": 0.349868506193161, + "rewards/rejected": -0.39113855361938477, + "step": 2820 + }, + { + "epoch": 0.54, + "learning_rate": 2.595554273109564e-06, + "logits/chosen": -1.3623178005218506, + "logits/rejected": -0.9337053298950195, + "logps/chosen": -494.78363037109375, + "logps/rejected": -1105.956787109375, + "loss": 0.0832, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.06648660451173782, + "rewards/margins": 0.2886095941066742, + "rewards/rejected": -0.3550961911678314, + "step": 2830 + }, + { + "epoch": 0.54, + "learning_rate": 2.5789422461412776e-06, + "logits/chosen": -1.5411412715911865, + "logits/rejected": -1.069968581199646, + "logps/chosen": -604.7297973632812, + "logps/rejected": -1145.9554443359375, + "loss": 0.0999, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.14811378717422485, + "rewards/margins": 0.24691152572631836, + "rewards/rejected": -0.3950252830982208, + "step": 2840 + }, + { + "epoch": 0.54, + "learning_rate": 2.5623267293451827e-06, + "logits/chosen": -1.8171898126602173, + "logits/rejected": -1.014732003211975, + "logps/chosen": -595.6168212890625, + "logps/rejected": -1368.0377197265625, + "loss": 0.0434, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.11283586919307709, + "rewards/margins": 0.3873719573020935, + "rewards/rejected": -0.5002078413963318, + "step": 2850 + }, + { + "epoch": 0.54, + "learning_rate": 2.5457084572493094e-06, + "logits/chosen": -1.6847915649414062, + "logits/rejected": -0.8486756086349487, + "logps/chosen": -611.9779663085938, + "logps/rejected": -1245.8311767578125, + "loss": 0.067, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.11453308165073395, + "rewards/margins": 0.3070717453956604, + "rewards/rejected": -0.42160478234291077, + "step": 2860 + }, + { + "epoch": 0.55, + "learning_rate": 2.5290881645034932e-06, + "logits/chosen": -1.5983669757843018, + "logits/rejected": -1.0357117652893066, + "logps/chosen": -570.3855590820312, + "logps/rejected": -1264.300537109375, + "loss": 0.0579, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.11936721950769424, + "rewards/margins": 0.3358164429664612, + "rewards/rejected": -0.45518365502357483, + "step": 2870 + }, + { + "epoch": 0.55, + "learning_rate": 2.5124665858468956e-06, + "logits/chosen": -1.475404977798462, + "logits/rejected": -0.9934619069099426, + "logps/chosen": -562.2199096679688, + "logps/rejected": -1392.272216796875, + "loss": 0.0574, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.13683505356311798, + "rewards/margins": 0.36336854100227356, + "rewards/rejected": -0.5002034902572632, + "step": 2880 + }, + { + "epoch": 0.55, + "learning_rate": 2.4958444560755268e-06, + "logits/chosen": -1.543084979057312, + "logits/rejected": -0.8719257116317749, + "logps/chosen": -697.3746948242188, + "logps/rejected": -1420.4222412109375, + "loss": 0.0386, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.19695594906806946, + "rewards/margins": 0.35786157846450806, + "rewards/rejected": -0.5548174977302551, + "step": 2890 + }, + { + "epoch": 0.55, + "learning_rate": 2.479222510009758e-06, + "logits/chosen": -1.5835835933685303, + "logits/rejected": -0.9517591595649719, + "logps/chosen": -674.9082641601562, + "logps/rejected": -1285.297119140625, + "loss": 0.0815, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.1738702654838562, + "rewards/margins": 0.2946757674217224, + "rewards/rejected": -0.4685460031032562, + "step": 2900 + }, + { + "epoch": 0.55, + "learning_rate": 2.4626014824618418e-06, + "logits/chosen": -1.5905416011810303, + "logits/rejected": -0.8818572163581848, + "logps/chosen": -649.43505859375, + "logps/rejected": -1258.1546630859375, + "loss": 0.0676, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.13180610537528992, + "rewards/margins": 0.34726306796073914, + "rewards/rejected": -0.47906917333602905, + "step": 2910 + }, + { + "epoch": 0.56, + "learning_rate": 2.445982108203422e-06, + "logits/chosen": -1.4787397384643555, + "logits/rejected": -0.7833604216575623, + "logps/chosen": -612.2667846679688, + "logps/rejected": -1210.948974609375, + "loss": 0.0592, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1284160166978836, + "rewards/margins": 0.31005439162254333, + "rewards/rejected": -0.43847042322158813, + "step": 2920 + }, + { + "epoch": 0.56, + "learning_rate": 2.4293651219330614e-06, + "logits/chosen": -1.5488474369049072, + "logits/rejected": -0.9995074272155762, + "logps/chosen": -633.7059936523438, + "logps/rejected": -1303.328857421875, + "loss": 0.0551, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.13731206953525543, + "rewards/margins": 0.3035878539085388, + "rewards/rejected": -0.44089993834495544, + "step": 2930 + }, + { + "epoch": 0.56, + "learning_rate": 2.4127512582437486e-06, + "logits/chosen": -1.6039226055145264, + "logits/rejected": -0.9585688710212708, + "logps/chosen": -592.0299072265625, + "logps/rejected": -1321.1495361328125, + "loss": 0.0554, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11685065180063248, + "rewards/margins": 0.3511469066143036, + "rewards/rejected": -0.46799755096435547, + "step": 2940 + }, + { + "epoch": 0.56, + "learning_rate": 2.3961412515904337e-06, + "logits/chosen": -1.3695369958877563, + "logits/rejected": -0.9800816774368286, + "logps/chosen": -594.6244506835938, + "logps/rejected": -1240.9417724609375, + "loss": 0.0855, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.14591816067695618, + "rewards/margins": 0.3062663674354553, + "rewards/rejected": -0.4521844983100891, + "step": 2950 + }, + { + "epoch": 0.56, + "learning_rate": 2.3795358362575618e-06, + "logits/chosen": -1.5104422569274902, + "logits/rejected": -1.1466343402862549, + "logps/chosen": -510.5021057128906, + "logps/rejected": -1294.258544921875, + "loss": 0.0478, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.1302535980939865, + "rewards/margins": 0.3396373689174652, + "rewards/rejected": -0.4698910117149353, + "step": 2960 + }, + { + "epoch": 0.57, + "learning_rate": 2.3629357463266e-06, + "logits/chosen": -1.6058807373046875, + "logits/rejected": -1.0046648979187012, + "logps/chosen": -508.4608459472656, + "logps/rejected": -1332.2764892578125, + "loss": 0.0465, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.06275545060634613, + "rewards/margins": 0.3741517961025238, + "rewards/rejected": -0.43690723180770874, + "step": 2970 + }, + { + "epoch": 0.57, + "learning_rate": 2.346341715643601e-06, + "logits/chosen": -1.4893066883087158, + "logits/rejected": -0.8853636980056763, + "logps/chosen": -449.1748046875, + "logps/rejected": -1121.550537109375, + "loss": 0.0551, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.063736692070961, + "rewards/margins": 0.3206193447113037, + "rewards/rejected": -0.38435599207878113, + "step": 2980 + }, + { + "epoch": 0.57, + "learning_rate": 2.32975447778675e-06, + "logits/chosen": -1.7218694686889648, + "logits/rejected": -0.8579978942871094, + "logps/chosen": -607.2933349609375, + "logps/rejected": -1323.5452880859375, + "loss": 0.0698, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.12219960987567902, + "rewards/margins": 0.35332444310188293, + "rewards/rejected": -0.47552403807640076, + "step": 2990 + }, + { + "epoch": 0.57, + "learning_rate": 2.3131747660339396e-06, + "logits/chosen": -1.630824089050293, + "logits/rejected": -1.1554635763168335, + "logps/chosen": -639.3021240234375, + "logps/rejected": -1315.35498046875, + "loss": 0.087, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.14220556616783142, + "rewards/margins": 0.30729326605796814, + "rewards/rejected": -0.44949883222579956, + "step": 3000 + }, + { + "epoch": 0.57, + "learning_rate": 2.296603313330355e-06, + "logits/chosen": -1.4276154041290283, + "logits/rejected": -0.9441580772399902, + "logps/chosen": -727.0963134765625, + "logps/rejected": -1390.979736328125, + "loss": 0.1062, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.15815845131874084, + "rewards/margins": 0.3167917728424072, + "rewards/rejected": -0.47495022416114807, + "step": 3010 + }, + { + "epoch": 0.58, + "learning_rate": 2.280040852256068e-06, + "logits/chosen": -1.4897199869155884, + "logits/rejected": -1.0120981931686401, + "logps/chosen": -607.09521484375, + "logps/rejected": -1325.50390625, + "loss": 0.0785, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11801674216985703, + "rewards/margins": 0.3201891779899597, + "rewards/rejected": -0.4382059574127197, + "step": 3020 + }, + { + "epoch": 0.58, + "learning_rate": 2.2634881149936576e-06, + "logits/chosen": -1.4906480312347412, + "logits/rejected": -1.0610196590423584, + "logps/chosen": -488.85565185546875, + "logps/rejected": -1112.202392578125, + "loss": 0.0616, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.0860569030046463, + "rewards/margins": 0.2713943421840668, + "rewards/rejected": -0.35745126008987427, + "step": 3030 + }, + { + "epoch": 0.58, + "learning_rate": 2.246945833295836e-06, + "logits/chosen": -1.4534862041473389, + "logits/rejected": -0.9673159718513489, + "logps/chosen": -605.3302001953125, + "logps/rejected": -1201.044189453125, + "loss": 0.076, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.133608877658844, + "rewards/margins": 0.2811334431171417, + "rewards/rejected": -0.41474229097366333, + "step": 3040 + }, + { + "epoch": 0.58, + "learning_rate": 2.230414738453104e-06, + "logits/chosen": -1.767216682434082, + "logits/rejected": -0.8330327868461609, + "logps/chosen": -564.5643310546875, + "logps/rejected": -1205.147705078125, + "loss": 0.054, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.09630884975194931, + "rewards/margins": 0.3197113871574402, + "rewards/rejected": -0.4160202443599701, + "step": 3050 + }, + { + "epoch": 0.58, + "learning_rate": 2.2138955612614206e-06, + "logits/chosen": -1.421764612197876, + "logits/rejected": -1.0521514415740967, + "logps/chosen": -628.13232421875, + "logps/rejected": -1224.50830078125, + "loss": 0.0946, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.14950081706047058, + "rewards/margins": 0.2471323311328888, + "rewards/rejected": -0.396633118391037, + "step": 3060 + }, + { + "epoch": 0.58, + "learning_rate": 2.1973890319898965e-06, + "logits/chosen": -1.6031911373138428, + "logits/rejected": -1.0214576721191406, + "logps/chosen": -562.112060546875, + "logps/rejected": -1285.865966796875, + "loss": 0.0528, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.08155658096075058, + "rewards/margins": 0.31444963812828064, + "rewards/rejected": -0.3960062563419342, + "step": 3070 + }, + { + "epoch": 0.59, + "learning_rate": 2.1808958803485134e-06, + "logits/chosen": -1.4915971755981445, + "logits/rejected": -1.0089516639709473, + "logps/chosen": -610.0130004882812, + "logps/rejected": -1197.3531494140625, + "loss": 0.0601, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.1019025444984436, + "rewards/margins": 0.26567164063453674, + "rewards/rejected": -0.36757418513298035, + "step": 3080 + }, + { + "epoch": 0.59, + "learning_rate": 2.1644168354558623e-06, + "logits/chosen": -1.6156599521636963, + "logits/rejected": -1.1080366373062134, + "logps/chosen": -492.7898864746094, + "logps/rejected": -1027.3740234375, + "loss": 0.0683, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.05810853838920593, + "rewards/margins": 0.2624923884868622, + "rewards/rejected": -0.3206009268760681, + "step": 3090 + }, + { + "epoch": 0.59, + "learning_rate": 2.1479526258069086e-06, + "logits/chosen": -1.5177949666976929, + "logits/rejected": -0.9924715757369995, + "logps/chosen": -546.3245849609375, + "logps/rejected": -1170.1224365234375, + "loss": 0.0951, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.08852513134479523, + "rewards/margins": 0.27009642124176025, + "rewards/rejected": -0.3586215376853943, + "step": 3100 + }, + { + "epoch": 0.59, + "learning_rate": 2.1315039792407975e-06, + "logits/chosen": -1.3530235290527344, + "logits/rejected": -0.7246929407119751, + "logps/chosen": -548.1713256835938, + "logps/rejected": -1333.8492431640625, + "loss": 0.0413, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.09849376976490021, + "rewards/margins": 0.3531506657600403, + "rewards/rejected": -0.4516444206237793, + "step": 3110 + }, + { + "epoch": 0.59, + "learning_rate": 2.115071622908666e-06, + "logits/chosen": -1.4942448139190674, + "logits/rejected": -0.7404344081878662, + "logps/chosen": -500.0982360839844, + "logps/rejected": -1106.1728515625, + "loss": 0.0538, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.06125213950872421, + "rewards/margins": 0.31473299860954285, + "rewards/rejected": -0.37598511576652527, + "step": 3120 + }, + { + "epoch": 0.6, + "learning_rate": 2.0986562832415063e-06, + "logits/chosen": -1.6442813873291016, + "logits/rejected": -1.1128791570663452, + "logps/chosen": -557.3236083984375, + "logps/rejected": -1182.487060546875, + "loss": 0.0558, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.04658445343375206, + "rewards/margins": 0.33271127939224243, + "rewards/rejected": -0.3792957663536072, + "step": 3130 + }, + { + "epoch": 0.6, + "learning_rate": 2.082258685918047e-06, + "logits/chosen": -1.6417179107666016, + "logits/rejected": -1.0537028312683105, + "logps/chosen": -473.1177673339844, + "logps/rejected": -1105.520263671875, + "loss": 0.0614, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.05988591909408569, + "rewards/margins": 0.2984519600868225, + "rewards/rejected": -0.358337938785553, + "step": 3140 + }, + { + "epoch": 0.6, + "learning_rate": 2.0658795558326745e-06, + "logits/chosen": -1.6194617748260498, + "logits/rejected": -1.0565998554229736, + "logps/chosen": -530.3972778320312, + "logps/rejected": -1221.812744140625, + "loss": 0.0723, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.07586511969566345, + "rewards/margins": 0.324541836977005, + "rewards/rejected": -0.40040698647499084, + "step": 3150 + }, + { + "epoch": 0.6, + "learning_rate": 2.049519617063389e-06, + "logits/chosen": -1.5704630613327026, + "logits/rejected": -0.9416986703872681, + "logps/chosen": -497.73974609375, + "logps/rejected": -1204.732666015625, + "loss": 0.0386, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.07720668613910675, + "rewards/margins": 0.3270443081855774, + "rewards/rejected": -0.40425100922584534, + "step": 3160 + }, + { + "epoch": 0.6, + "learning_rate": 2.033179592839792e-06, + "logits/chosen": -1.8685951232910156, + "logits/rejected": -1.069949984550476, + "logps/chosen": -582.2603759765625, + "logps/rejected": -1132.173095703125, + "loss": 0.0761, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.08046362549066544, + "rewards/margins": 0.2879236340522766, + "rewards/rejected": -0.3683873414993286, + "step": 3170 + }, + { + "epoch": 0.61, + "learning_rate": 2.0168602055111175e-06, + "logits/chosen": -1.5395227670669556, + "logits/rejected": -1.046355962753296, + "logps/chosen": -564.5604248046875, + "logps/rejected": -1171.841064453125, + "loss": 0.0757, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.10129845142364502, + "rewards/margins": 0.29600051045417786, + "rewards/rejected": -0.3972989022731781, + "step": 3180 + }, + { + "epoch": 0.61, + "learning_rate": 2.0005621765142942e-06, + "logits/chosen": -1.4990097284317017, + "logits/rejected": -0.953599750995636, + "logps/chosen": -579.7748413085938, + "logps/rejected": -1242.611572265625, + "loss": 0.0364, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08316659927368164, + "rewards/margins": 0.32868489623069763, + "rewards/rejected": -0.4118514657020569, + "step": 3190 + }, + { + "epoch": 0.61, + "learning_rate": 1.9842862263420565e-06, + "logits/chosen": -1.3561508655548096, + "logits/rejected": -0.8739617466926575, + "logps/chosen": -579.7095947265625, + "logps/rejected": -1226.96728515625, + "loss": 0.0817, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10601375252008438, + "rewards/margins": 0.302290141582489, + "rewards/rejected": -0.4083038866519928, + "step": 3200 + }, + { + "epoch": 0.61, + "learning_rate": 1.9680330745110954e-06, + "logits/chosen": -1.5132472515106201, + "logits/rejected": -0.9904989004135132, + "logps/chosen": -658.9536743164062, + "logps/rejected": -1169.8291015625, + "loss": 0.0817, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.1283619999885559, + "rewards/margins": 0.24815258383750916, + "rewards/rejected": -0.37651458382606506, + "step": 3210 + }, + { + "epoch": 0.61, + "learning_rate": 1.9518034395302413e-06, + "logits/chosen": -1.6865606307983398, + "logits/rejected": -0.724189281463623, + "logps/chosen": -647.4058837890625, + "logps/rejected": -1245.415771484375, + "loss": 0.0529, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.09649708122015, + "rewards/margins": 0.3382863402366638, + "rewards/rejected": -0.434783399105072, + "step": 3220 + }, + { + "epoch": 0.62, + "learning_rate": 1.9355980388687145e-06, + "logits/chosen": -1.61764657497406, + "logits/rejected": -0.9387199282646179, + "logps/chosen": -669.2235107421875, + "logps/rejected": -1221.201416015625, + "loss": 0.0648, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.13963374495506287, + "rewards/margins": 0.3037804663181305, + "rewards/rejected": -0.44341421127319336, + "step": 3230 + }, + { + "epoch": 0.62, + "learning_rate": 1.9194175889243942e-06, + "logits/chosen": -1.5610865354537964, + "logits/rejected": -0.8656753301620483, + "logps/chosen": -716.8016967773438, + "logps/rejected": -1312.772216796875, + "loss": 0.0645, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.15120446681976318, + "rewards/margins": 0.3280597925186157, + "rewards/rejected": -0.4792643189430237, + "step": 3240 + }, + { + "epoch": 0.62, + "learning_rate": 1.903262804992156e-06, + "logits/chosen": -1.409865140914917, + "logits/rejected": -0.7154837846755981, + "logps/chosen": -558.9967651367188, + "logps/rejected": -1203.494873046875, + "loss": 0.0566, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.09356357902288437, + "rewards/margins": 0.32224196195602417, + "rewards/rejected": -0.41580551862716675, + "step": 3250 + }, + { + "epoch": 0.62, + "learning_rate": 1.8871344012322504e-06, + "logits/chosen": -1.3692007064819336, + "logits/rejected": -0.7519701719284058, + "logps/chosen": -581.4006958007812, + "logps/rejected": -1174.0390625, + "loss": 0.0703, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.10192625224590302, + "rewards/margins": 0.308377206325531, + "rewards/rejected": -0.4103034436702728, + "step": 3260 + }, + { + "epoch": 0.62, + "learning_rate": 1.8710330906387288e-06, + "logits/chosen": -1.7411953210830688, + "logits/rejected": -1.087471842765808, + "logps/chosen": -503.864501953125, + "logps/rejected": -1072.919677734375, + "loss": 0.1001, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.09860959649085999, + "rewards/margins": 0.25538548827171326, + "rewards/rejected": -0.35399508476257324, + "step": 3270 + }, + { + "epoch": 0.62, + "learning_rate": 1.8549595850079272e-06, + "logits/chosen": -1.6818044185638428, + "logits/rejected": -1.2432438135147095, + "logps/chosen": -630.8226318359375, + "logps/rejected": -1347.3629150390625, + "loss": 0.0867, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12099182605743408, + "rewards/margins": 0.32913532853126526, + "rewards/rejected": -0.45012718439102173, + "step": 3280 + }, + { + "epoch": 0.63, + "learning_rate": 1.8389145949069953e-06, + "logits/chosen": -1.6793514490127563, + "logits/rejected": -1.0356289148330688, + "logps/chosen": -653.2823486328125, + "logps/rejected": -1186.5311279296875, + "loss": 0.1233, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.14252005517482758, + "rewards/margins": 0.24804458022117615, + "rewards/rejected": -0.3905646502971649, + "step": 3290 + }, + { + "epoch": 0.63, + "learning_rate": 1.8228988296424877e-06, + "logits/chosen": -1.4436924457550049, + "logits/rejected": -1.0423951148986816, + "logps/chosen": -534.2002563476562, + "logps/rejected": -1150.822509765625, + "loss": 0.0885, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.08847598731517792, + "rewards/margins": 0.2850678563117981, + "rewards/rejected": -0.3735438287258148, + "step": 3300 + }, + { + "epoch": 0.63, + "learning_rate": 1.806912997229008e-06, + "logits/chosen": -1.5583360195159912, + "logits/rejected": -0.9240388870239258, + "logps/chosen": -547.8662109375, + "logps/rejected": -1232.9605712890625, + "loss": 0.0587, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.09903976321220398, + "rewards/margins": 0.3067070543766022, + "rewards/rejected": -0.40574678778648376, + "step": 3310 + }, + { + "epoch": 0.63, + "learning_rate": 1.7909578043579037e-06, + "logits/chosen": -1.6759271621704102, + "logits/rejected": -1.00883150100708, + "logps/chosen": -617.1407470703125, + "logps/rejected": -1213.87109375, + "loss": 0.0595, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10988689959049225, + "rewards/margins": 0.28930503129959106, + "rewards/rejected": -0.39919185638427734, + "step": 3320 + }, + { + "epoch": 0.63, + "learning_rate": 1.7750339563660346e-06, + "logits/chosen": -1.8040192127227783, + "logits/rejected": -1.1109973192214966, + "logps/chosen": -591.2498168945312, + "logps/rejected": -1209.500732421875, + "loss": 0.0738, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.09945808351039886, + "rewards/margins": 0.2953023612499237, + "rewards/rejected": -0.39476045966148376, + "step": 3330 + }, + { + "epoch": 0.64, + "learning_rate": 1.759142157204583e-06, + "logits/chosen": -1.4551546573638916, + "logits/rejected": -1.2808643579483032, + "logps/chosen": -430.88592529296875, + "logps/rejected": -990.6643676757812, + "loss": 0.1033, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.07554015517234802, + "rewards/margins": 0.23506391048431396, + "rewards/rejected": -0.3106040358543396, + "step": 3340 + }, + { + "epoch": 0.64, + "learning_rate": 1.7432831094079357e-06, + "logits/chosen": -1.647020936012268, + "logits/rejected": -0.9831321835517883, + "logps/chosen": -474.423583984375, + "logps/rejected": -1153.595947265625, + "loss": 0.0561, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.05415312573313713, + "rewards/margins": 0.315816193819046, + "rewards/rejected": -0.36996930837631226, + "step": 3350 + }, + { + "epoch": 0.64, + "learning_rate": 1.7274575140626318e-06, + "logits/chosen": -1.8123953342437744, + "logits/rejected": -1.0507439374923706, + "logps/chosen": -510.15374755859375, + "logps/rejected": -1266.872802734375, + "loss": 0.0711, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.04254557937383652, + "rewards/margins": 0.3346264958381653, + "rewards/rejected": -0.37717205286026, + "step": 3360 + }, + { + "epoch": 0.64, + "learning_rate": 1.7116660707763637e-06, + "logits/chosen": -1.4582722187042236, + "logits/rejected": -0.9082058668136597, + "logps/chosen": -547.4808959960938, + "logps/rejected": -1253.8936767578125, + "loss": 0.0398, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.05783357471227646, + "rewards/margins": 0.33342987298965454, + "rewards/rejected": -0.3912634551525116, + "step": 3370 + }, + { + "epoch": 0.64, + "learning_rate": 1.695909477647054e-06, + "logits/chosen": -1.4480509757995605, + "logits/rejected": -0.9746176600456238, + "logps/chosen": -525.312255859375, + "logps/rejected": -1169.0672607421875, + "loss": 0.0675, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.07152949273586273, + "rewards/margins": 0.29411083459854126, + "rewards/rejected": -0.36564040184020996, + "step": 3380 + }, + { + "epoch": 0.65, + "learning_rate": 1.6801884312319893e-06, + "logits/chosen": -1.4334437847137451, + "logits/rejected": -0.9882246255874634, + "logps/chosen": -490.239013671875, + "logps/rejected": -1054.856201171875, + "loss": 0.0758, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.033695705235004425, + "rewards/margins": 0.2827971577644348, + "rewards/rejected": -0.31649279594421387, + "step": 3390 + }, + { + "epoch": 0.65, + "learning_rate": 1.6645036265170314e-06, + "logits/chosen": -1.5410234928131104, + "logits/rejected": -0.8343310356140137, + "logps/chosen": -517.603759765625, + "logps/rejected": -1253.4979248046875, + "loss": 0.0561, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.07378663122653961, + "rewards/margins": 0.3355935215950012, + "rewards/rejected": -0.40938013792037964, + "step": 3400 + }, + { + "epoch": 0.65, + "learning_rate": 1.648855756885893e-06, + "logits/chosen": -1.642655611038208, + "logits/rejected": -1.2012007236480713, + "logps/chosen": -450.128173828125, + "logps/rejected": -1080.1187744140625, + "loss": 0.0879, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.05434330180287361, + "rewards/margins": 0.27073508501052856, + "rewards/rejected": -0.3250783383846283, + "step": 3410 + }, + { + "epoch": 0.65, + "learning_rate": 1.633245514089482e-06, + "logits/chosen": -1.535819411277771, + "logits/rejected": -0.8469101190567017, + "logps/chosen": -515.0177612304688, + "logps/rejected": -1162.944091796875, + "loss": 0.0711, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.08549628406763077, + "rewards/margins": 0.2991836369037628, + "rewards/rejected": -0.3846798837184906, + "step": 3420 + }, + { + "epoch": 0.65, + "learning_rate": 1.6176735882153284e-06, + "logits/chosen": -1.6216903924942017, + "logits/rejected": -0.785152792930603, + "logps/chosen": -591.2189331054688, + "logps/rejected": -1124.4949951171875, + "loss": 0.0772, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.11196261644363403, + "rewards/margins": 0.2648715376853943, + "rewards/rejected": -0.37683412432670593, + "step": 3430 + }, + { + "epoch": 0.66, + "learning_rate": 1.6021406676570667e-06, + "logits/chosen": -1.3090717792510986, + "logits/rejected": -0.9269634485244751, + "logps/chosen": -579.5616455078125, + "logps/rejected": -1204.068359375, + "loss": 0.0947, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.14344973862171173, + "rewards/margins": 0.2711476683616638, + "rewards/rejected": -0.41459742188453674, + "step": 3440 + }, + { + "epoch": 0.66, + "learning_rate": 1.5866474390840126e-06, + "logits/chosen": -1.6060562133789062, + "logits/rejected": -1.0789166688919067, + "logps/chosen": -631.2396240234375, + "logps/rejected": -1278.0540771484375, + "loss": 0.065, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.12369465827941895, + "rewards/margins": 0.3081647455692291, + "rewards/rejected": -0.43185940384864807, + "step": 3450 + }, + { + "epoch": 0.66, + "learning_rate": 1.5711945874108053e-06, + "logits/chosen": -1.5090563297271729, + "logits/rejected": -0.8889325261116028, + "logps/chosen": -547.3897705078125, + "logps/rejected": -1296.255859375, + "loss": 0.1008, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1164555549621582, + "rewards/margins": 0.3054220378398895, + "rewards/rejected": -0.4218776226043701, + "step": 3460 + }, + { + "epoch": 0.66, + "learning_rate": 1.5557827957671249e-06, + "logits/chosen": -1.4225951433181763, + "logits/rejected": -0.8890473246574402, + "logps/chosen": -510.88232421875, + "logps/rejected": -1235.585205078125, + "loss": 0.0621, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.09390435367822647, + "rewards/margins": 0.33430469036102295, + "rewards/rejected": -0.4282090663909912, + "step": 3470 + }, + { + "epoch": 0.66, + "learning_rate": 1.5404127454674994e-06, + "logits/chosen": -1.4549505710601807, + "logits/rejected": -0.9507797360420227, + "logps/chosen": -447.9923400878906, + "logps/rejected": -1089.358642578125, + "loss": 0.0869, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.08430926501750946, + "rewards/margins": 0.2995262145996094, + "rewards/rejected": -0.38383546471595764, + "step": 3480 + }, + { + "epoch": 0.66, + "learning_rate": 1.5250851159811809e-06, + "logits/chosen": -1.4489725828170776, + "logits/rejected": -0.8240424394607544, + "logps/chosen": -536.1524658203125, + "logps/rejected": -1204.070068359375, + "loss": 0.0628, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.09031540900468826, + "rewards/margins": 0.32029175758361816, + "rewards/rejected": -0.41060715913772583, + "step": 3490 + }, + { + "epoch": 0.67, + "learning_rate": 1.509800584902108e-06, + "logits/chosen": -1.6435320377349854, + "logits/rejected": -0.9851503372192383, + "logps/chosen": -394.79254150390625, + "logps/rejected": -1075.1129150390625, + "loss": 0.0555, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.05022016167640686, + "rewards/margins": 0.3278888165950775, + "rewards/rejected": -0.37810903787612915, + "step": 3500 + }, + { + "epoch": 0.67, + "learning_rate": 1.4945598279189565e-06, + "logits/chosen": -1.6852319240570068, + "logits/rejected": -0.7762208580970764, + "logps/chosen": -584.0240478515625, + "logps/rejected": -1283.089599609375, + "loss": 0.0393, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.07786907255649567, + "rewards/margins": 0.36127179861068726, + "rewards/rejected": -0.4391408860683441, + "step": 3510 + }, + { + "epoch": 0.67, + "learning_rate": 1.4793635187852622e-06, + "logits/chosen": -1.5712199211120605, + "logits/rejected": -0.8487402200698853, + "logps/chosen": -625.174560546875, + "logps/rejected": -1259.09912109375, + "loss": 0.0649, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.1212628111243248, + "rewards/margins": 0.28756266832351685, + "rewards/rejected": -0.40882548689842224, + "step": 3520 + }, + { + "epoch": 0.67, + "learning_rate": 1.4642123292896406e-06, + "logits/chosen": -1.7730987071990967, + "logits/rejected": -1.187359094619751, + "logps/chosen": -506.89727783203125, + "logps/rejected": -1099.273193359375, + "loss": 0.0719, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.055269528180360794, + "rewards/margins": 0.2945236563682556, + "rewards/rejected": -0.3497931659221649, + "step": 3530 + }, + { + "epoch": 0.67, + "learning_rate": 1.4491069292260867e-06, + "logits/chosen": -1.5588057041168213, + "logits/rejected": -0.9192155599594116, + "logps/chosen": -591.0391845703125, + "logps/rejected": -1198.16015625, + "loss": 0.0574, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.08599583059549332, + "rewards/margins": 0.29813671112060547, + "rewards/rejected": -0.3841325342655182, + "step": 3540 + }, + { + "epoch": 0.68, + "learning_rate": 1.4340479863643658e-06, + "logits/chosen": -1.5649362802505493, + "logits/rejected": -1.0527899265289307, + "logps/chosen": -587.0765380859375, + "logps/rejected": -1299.611572265625, + "loss": 0.0652, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.11504580825567245, + "rewards/margins": 0.3227574825286865, + "rewards/rejected": -0.43780332803726196, + "step": 3550 + }, + { + "epoch": 0.68, + "learning_rate": 1.4190361664204936e-06, + "logits/chosen": -1.5563275814056396, + "logits/rejected": -1.1789706945419312, + "logps/chosen": -463.9998474121094, + "logps/rejected": -1109.9326171875, + "loss": 0.0852, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.05924314260482788, + "rewards/margins": 0.2901732325553894, + "rewards/rejected": -0.3494163751602173, + "step": 3560 + }, + { + "epoch": 0.68, + "learning_rate": 1.4040721330273063e-06, + "logits/chosen": -1.5990869998931885, + "logits/rejected": -0.8658841252326965, + "logps/chosen": -562.2022705078125, + "logps/rejected": -1232.999755859375, + "loss": 0.0502, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.06985044479370117, + "rewards/margins": 0.32169193029403687, + "rewards/rejected": -0.39154237508773804, + "step": 3570 + }, + { + "epoch": 0.68, + "learning_rate": 1.3891565477051242e-06, + "logits/chosen": -1.259183645248413, + "logits/rejected": -1.056996464729309, + "logps/chosen": -360.9638671875, + "logps/rejected": -916.8385620117188, + "loss": 0.0722, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.060729265213012695, + "rewards/margins": 0.2513827681541443, + "rewards/rejected": -0.312112033367157, + "step": 3580 + }, + { + "epoch": 0.68, + "learning_rate": 1.3742900698325034e-06, + "logits/chosen": -1.6714311838150024, + "logits/rejected": -0.8850634694099426, + "logps/chosen": -618.9185791015625, + "logps/rejected": -1252.8792724609375, + "loss": 0.0481, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.13342010974884033, + "rewards/margins": 0.3281671404838562, + "rewards/rejected": -0.46158725023269653, + "step": 3590 + }, + { + "epoch": 0.69, + "learning_rate": 1.3594733566170925e-06, + "logits/chosen": -1.5407627820968628, + "logits/rejected": -1.0272929668426514, + "logps/chosen": -562.9507446289062, + "logps/rejected": -1212.2821044921875, + "loss": 0.0512, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.11579425632953644, + "rewards/margins": 0.3113517165184021, + "rewards/rejected": -0.42714595794677734, + "step": 3600 + }, + { + "epoch": 0.69, + "learning_rate": 1.3447070630665771e-06, + "logits/chosen": -1.5532509088516235, + "logits/rejected": -1.0339925289154053, + "logps/chosen": -618.67626953125, + "logps/rejected": -1198.66796875, + "loss": 0.075, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.12184294313192368, + "rewards/margins": 0.2842368483543396, + "rewards/rejected": -0.40607982873916626, + "step": 3610 + }, + { + "epoch": 0.69, + "learning_rate": 1.329991841959717e-06, + "logits/chosen": -1.4766697883605957, + "logits/rejected": -1.0048980712890625, + "logps/chosen": -446.8675231933594, + "logps/rejected": -1180.1754150390625, + "loss": 0.0837, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.058239441365003586, + "rewards/margins": 0.3347640931606293, + "rewards/rejected": -0.39300355315208435, + "step": 3620 + }, + { + "epoch": 0.69, + "learning_rate": 1.3153283438175036e-06, + "logits/chosen": -1.6488538980484009, + "logits/rejected": -1.1211696863174438, + "logps/chosen": -476.58233642578125, + "logps/rejected": -1119.6107177734375, + "loss": 0.0622, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08521339297294617, + "rewards/margins": 0.2965385317802429, + "rewards/rejected": -0.3817519545555115, + "step": 3630 + }, + { + "epoch": 0.69, + "learning_rate": 1.3007172168743854e-06, + "logits/chosen": -1.5866855382919312, + "logits/rejected": -1.1675742864608765, + "logps/chosen": -620.083740234375, + "logps/rejected": -1222.1119384765625, + "loss": 0.0785, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.11164329200983047, + "rewards/margins": 0.2959319055080414, + "rewards/rejected": -0.40757519006729126, + "step": 3640 + }, + { + "epoch": 0.7, + "learning_rate": 1.2861591070496193e-06, + "logits/chosen": -1.6165597438812256, + "logits/rejected": -0.9629543423652649, + "logps/chosen": -572.568359375, + "logps/rejected": -1131.6605224609375, + "loss": 0.0532, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08107302337884903, + "rewards/margins": 0.28896981477737427, + "rewards/rejected": -0.3700428009033203, + "step": 3650 + }, + { + "epoch": 0.7, + "learning_rate": 1.271654657918722e-06, + "logits/chosen": -1.4744112491607666, + "logits/rejected": -1.0718681812286377, + "logps/chosen": -544.6527099609375, + "logps/rejected": -1227.4991455078125, + "loss": 0.0542, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.1165962815284729, + "rewards/margins": 0.28793099522590637, + "rewards/rejected": -0.4045272767543793, + "step": 3660 + }, + { + "epoch": 0.7, + "learning_rate": 1.2572045106850051e-06, + "logits/chosen": -1.5404466390609741, + "logits/rejected": -1.0062000751495361, + "logps/chosen": -482.5479431152344, + "logps/rejected": -1285.21142578125, + "loss": 0.057, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.0981147438287735, + "rewards/margins": 0.3251039981842041, + "rewards/rejected": -0.4232187867164612, + "step": 3670 + }, + { + "epoch": 0.7, + "learning_rate": 1.2428093041512418e-06, + "logits/chosen": -1.6958799362182617, + "logits/rejected": -1.104161024093628, + "logps/chosen": -482.8475646972656, + "logps/rejected": -1170.7442626953125, + "loss": 0.072, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.08815275132656097, + "rewards/margins": 0.3066135048866272, + "rewards/rejected": -0.3947662115097046, + "step": 3680 + }, + { + "epoch": 0.7, + "learning_rate": 1.2284696746914216e-06, + "logits/chosen": -1.5207383632659912, + "logits/rejected": -1.0584545135498047, + "logps/chosen": -582.7368774414062, + "logps/rejected": -1198.0496826171875, + "loss": 0.0751, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.12373187392950058, + "rewards/margins": 0.27462083101272583, + "rewards/rejected": -0.3983527421951294, + "step": 3690 + }, + { + "epoch": 0.7, + "learning_rate": 1.2141862562226164e-06, + "logits/chosen": -1.727847695350647, + "logits/rejected": -0.9943065643310547, + "logps/chosen": -509.9764709472656, + "logps/rejected": -1151.262451171875, + "loss": 0.0466, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.05588286370038986, + "rewards/margins": 0.31229880452156067, + "rewards/rejected": -0.36818164587020874, + "step": 3700 + }, + { + "epoch": 0.71, + "learning_rate": 1.1999596801769617e-06, + "logits/chosen": -1.8949705362319946, + "logits/rejected": -0.9455671310424805, + "logps/chosen": -665.4451904296875, + "logps/rejected": -1261.169677734375, + "loss": 0.0799, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1065397709608078, + "rewards/margins": 0.3085806965827942, + "rewards/rejected": -0.41512051224708557, + "step": 3710 + }, + { + "epoch": 0.71, + "learning_rate": 1.185790575473738e-06, + "logits/chosen": -1.275059461593628, + "logits/rejected": -0.8686116933822632, + "logps/chosen": -545.46044921875, + "logps/rejected": -1306.507568359375, + "loss": 0.0753, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.09290392696857452, + "rewards/margins": 0.314125120639801, + "rewards/rejected": -0.40702906250953674, + "step": 3720 + }, + { + "epoch": 0.71, + "learning_rate": 1.1716795684915728e-06, + "logits/chosen": -1.5622496604919434, + "logits/rejected": -1.0755438804626465, + "logps/chosen": -472.22412109375, + "logps/rejected": -1130.0338134765625, + "loss": 0.0626, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.05678543448448181, + "rewards/margins": 0.31124743819236755, + "rewards/rejected": -0.36803287267684937, + "step": 3730 + }, + { + "epoch": 0.71, + "learning_rate": 1.1576272830407418e-06, + "logits/chosen": -1.4323136806488037, + "logits/rejected": -0.9573151469230652, + "logps/chosen": -519.433837890625, + "logps/rejected": -1087.538818359375, + "loss": 0.1109, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09503252804279327, + "rewards/margins": 0.2525123953819275, + "rewards/rejected": -0.34754490852355957, + "step": 3740 + }, + { + "epoch": 0.71, + "learning_rate": 1.1436343403356019e-06, + "logits/chosen": -1.5816457271575928, + "logits/rejected": -1.0379760265350342, + "logps/chosen": -583.7271728515625, + "logps/rejected": -1389.2052001953125, + "loss": 0.0636, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.08136254549026489, + "rewards/margins": 0.3822881281375885, + "rewards/rejected": -0.4636506140232086, + "step": 3750 + }, + { + "epoch": 0.72, + "learning_rate": 1.129701358967123e-06, + "logits/chosen": -1.5716511011123657, + "logits/rejected": -0.927304744720459, + "logps/chosen": -579.1673583984375, + "logps/rejected": -1177.848876953125, + "loss": 0.0707, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.0907040387392044, + "rewards/margins": 0.2821573317050934, + "rewards/rejected": -0.372861385345459, + "step": 3760 + }, + { + "epoch": 0.72, + "learning_rate": 1.11582895487554e-06, + "logits/chosen": -1.665675401687622, + "logits/rejected": -1.0130513906478882, + "logps/chosen": -493.2945861816406, + "logps/rejected": -1227.0123291015625, + "loss": 0.0586, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.05040637403726578, + "rewards/margins": 0.33554786443710327, + "rewards/rejected": -0.38595423102378845, + "step": 3770 + }, + { + "epoch": 0.72, + "learning_rate": 1.1020177413231334e-06, + "logits/chosen": -1.416183352470398, + "logits/rejected": -1.0358549356460571, + "logps/chosen": -512.5576782226562, + "logps/rejected": -1100.0, + "loss": 0.0715, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.10818634927272797, + "rewards/margins": 0.26378339529037476, + "rewards/rejected": -0.3719697594642639, + "step": 3780 + }, + { + "epoch": 0.72, + "learning_rate": 1.0882683288671041e-06, + "logits/chosen": -1.4680287837982178, + "logits/rejected": -0.9518529176712036, + "logps/chosen": -548.0827026367188, + "logps/rejected": -1328.7269287109375, + "loss": 0.0503, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.07367765158414841, + "rewards/margins": 0.3535308241844177, + "rewards/rejected": -0.4272085130214691, + "step": 3790 + }, + { + "epoch": 0.72, + "learning_rate": 1.0745813253325957e-06, + "logits/chosen": -1.6463426351547241, + "logits/rejected": -0.9568156003952026, + "logps/chosen": -647.0377197265625, + "logps/rejected": -1093.1588134765625, + "loss": 0.0765, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.09032820910215378, + "rewards/margins": 0.251170814037323, + "rewards/rejected": -0.3414990305900574, + "step": 3800 + }, + { + "epoch": 0.73, + "learning_rate": 1.0609573357858166e-06, + "logits/chosen": -1.671026587486267, + "logits/rejected": -1.1713273525238037, + "logps/chosen": -475.3999938964844, + "logps/rejected": -1094.6204833984375, + "loss": 0.077, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.07541962713003159, + "rewards/margins": 0.2850693166255951, + "rewards/rejected": -0.3604889512062073, + "step": 3810 + }, + { + "epoch": 0.73, + "learning_rate": 1.0473969625072922e-06, + "logits/chosen": -1.8827602863311768, + "logits/rejected": -1.1715481281280518, + "logps/chosen": -581.587890625, + "logps/rejected": -1301.4676513671875, + "loss": 0.0463, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.09528975188732147, + "rewards/margins": 0.347636878490448, + "rewards/rejected": -0.44292664527893066, + "step": 3820 + }, + { + "epoch": 0.73, + "learning_rate": 1.0339008049652427e-06, + "logits/chosen": -1.6441001892089844, + "logits/rejected": -0.9791193008422852, + "logps/chosen": -703.3151245117188, + "logps/rejected": -1367.9464111328125, + "loss": 0.0797, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.14000825583934784, + "rewards/margins": 0.32136717438697815, + "rewards/rejected": -0.46137547492980957, + "step": 3830 + }, + { + "epoch": 0.73, + "learning_rate": 1.0204694597890814e-06, + "logits/chosen": -1.3856605291366577, + "logits/rejected": -0.8451806306838989, + "logps/chosen": -538.6572875976562, + "logps/rejected": -1143.777587890625, + "loss": 0.0825, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.12030009925365448, + "rewards/margins": 0.28403720259666443, + "rewards/rejected": -0.4043373167514801, + "step": 3840 + }, + { + "epoch": 0.73, + "learning_rate": 1.0071035207430352e-06, + "logits/chosen": -1.4096628427505493, + "logits/rejected": -0.9546631574630737, + "logps/chosen": -471.92523193359375, + "logps/rejected": -1200.522216796875, + "loss": 0.0552, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.10387523472309113, + "rewards/margins": 0.31556516885757446, + "rewards/rejected": -0.419440358877182, + "step": 3850 + }, + { + "epoch": 0.74, + "learning_rate": 9.938035786999018e-07, + "logits/chosen": -1.5193308591842651, + "logits/rejected": -0.9365988969802856, + "logps/chosen": -572.107421875, + "logps/rejected": -1061.31689453125, + "loss": 0.1134, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.11436333507299423, + "rewards/margins": 0.2368105947971344, + "rewards/rejected": -0.35117393732070923, + "step": 3860 + }, + { + "epoch": 0.74, + "learning_rate": 9.805702216149252e-07, + "logits/chosen": -1.5686334371566772, + "logits/rejected": -0.9215704202651978, + "logps/chosen": -517.8717041015625, + "logps/rejected": -1208.834228515625, + "loss": 0.0644, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.09085571765899658, + "rewards/margins": 0.3140665292739868, + "rewards/rejected": -0.4049221873283386, + "step": 3870 + }, + { + "epoch": 0.74, + "learning_rate": 9.674040344998056e-07, + "logits/chosen": -1.3506265878677368, + "logits/rejected": -0.9446635246276855, + "logps/chosen": -512.5413818359375, + "logps/rejected": -1294.516357421875, + "loss": 0.0453, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12554454803466797, + "rewards/margins": 0.3319617509841919, + "rewards/rejected": -0.45750635862350464, + "step": 3880 + }, + { + "epoch": 0.74, + "learning_rate": 9.543055993968339e-07, + "logits/chosen": -1.6407482624053955, + "logits/rejected": -1.1494085788726807, + "logps/chosen": -494.1866149902344, + "logps/rejected": -1177.568115234375, + "loss": 0.0758, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.11050983518362045, + "rewards/margins": 0.3092818558216095, + "rewards/rejected": -0.41979169845581055, + "step": 3890 + }, + { + "epoch": 0.74, + "learning_rate": 9.412754953531664e-07, + "logits/chosen": -1.5899689197540283, + "logits/rejected": -0.9618569612503052, + "logps/chosen": -592.8374633789062, + "logps/rejected": -1161.0634765625, + "loss": 0.0644, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10876324027776718, + "rewards/margins": 0.295836865901947, + "rewards/rejected": -0.40460005402565, + "step": 3900 + }, + { + "epoch": 0.74, + "learning_rate": 9.283142983952231e-07, + "logits/chosen": -1.3185956478118896, + "logits/rejected": -0.7824636101722717, + "logps/chosen": -538.34912109375, + "logps/rejected": -1115.861328125, + "loss": 0.0765, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.14305037260055542, + "rewards/margins": 0.2506643235683441, + "rewards/rejected": -0.3937147259712219, + "step": 3910 + }, + { + "epoch": 0.75, + "learning_rate": 9.154225815032242e-07, + "logits/chosen": -1.5346519947052002, + "logits/rejected": -0.8861868977546692, + "logps/chosen": -551.1466064453125, + "logps/rejected": -1097.8878173828125, + "loss": 0.0767, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.10631624609231949, + "rewards/margins": 0.27936822175979614, + "rewards/rejected": -0.38568446040153503, + "step": 3920 + }, + { + "epoch": 0.75, + "learning_rate": 9.026009145858608e-07, + "logits/chosen": -1.7078043222427368, + "logits/rejected": -0.961190402507782, + "logps/chosen": -557.4398193359375, + "logps/rejected": -1204.511474609375, + "loss": 0.054, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10399410873651505, + "rewards/margins": 0.31865912675857544, + "rewards/rejected": -0.4226532578468323, + "step": 3930 + }, + { + "epoch": 0.75, + "learning_rate": 8.898498644550973e-07, + "logits/chosen": -1.577141284942627, + "logits/rejected": -0.9125620722770691, + "logps/chosen": -578.0216064453125, + "logps/rejected": -1236.27392578125, + "loss": 0.0674, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.1092778891324997, + "rewards/margins": 0.3120391368865967, + "rewards/rejected": -0.4213170111179352, + "step": 3940 + }, + { + "epoch": 0.75, + "learning_rate": 8.771699948011203e-07, + "logits/chosen": -1.5836879014968872, + "logits/rejected": -1.0778744220733643, + "logps/chosen": -575.2800903320312, + "logps/rejected": -1225.6197509765625, + "loss": 0.0894, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.10885939747095108, + "rewards/margins": 0.3017827272415161, + "rewards/rejected": -0.410642147064209, + "step": 3950 + }, + { + "epoch": 0.75, + "learning_rate": 8.645618661674144e-07, + "logits/chosen": -1.499451994895935, + "logits/rejected": -0.9791691899299622, + "logps/chosen": -474.6846618652344, + "logps/rejected": -1006.2005004882812, + "loss": 0.1105, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.0962987095117569, + "rewards/margins": 0.2620392441749573, + "rewards/rejected": -0.35833796858787537, + "step": 3960 + }, + { + "epoch": 0.76, + "learning_rate": 8.520260359259822e-07, + "logits/chosen": -1.5831758975982666, + "logits/rejected": -1.1838436126708984, + "logps/chosen": -551.3702392578125, + "logps/rejected": -1213.033447265625, + "loss": 0.1138, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.106439970433712, + "rewards/margins": 0.28389599919319153, + "rewards/rejected": -0.39033594727516174, + "step": 3970 + }, + { + "epoch": 0.76, + "learning_rate": 8.395630582527075e-07, + "logits/chosen": -1.674072265625, + "logits/rejected": -0.7699130773544312, + "logps/chosen": -510.9281311035156, + "logps/rejected": -1112.57763671875, + "loss": 0.048, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.07025657594203949, + "rewards/margins": 0.30410030484199524, + "rewards/rejected": -0.37435686588287354, + "step": 3980 + }, + { + "epoch": 0.76, + "learning_rate": 8.271734841028553e-07, + "logits/chosen": -1.3906548023223877, + "logits/rejected": -0.737972617149353, + "logps/chosen": -568.7453002929688, + "logps/rejected": -1248.777099609375, + "loss": 0.0676, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.13117803633213043, + "rewards/margins": 0.30835506319999695, + "rewards/rejected": -0.4395330846309662, + "step": 3990 + }, + { + "epoch": 0.76, + "learning_rate": 8.148578611867114e-07, + "logits/chosen": -1.6215349435806274, + "logits/rejected": -0.7484738230705261, + "logps/chosen": -614.5372314453125, + "logps/rejected": -1331.172607421875, + "loss": 0.0462, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.1255607157945633, + "rewards/margins": 0.3478773832321167, + "rewards/rejected": -0.4734380841255188, + "step": 4000 + }, + { + "epoch": 0.76, + "learning_rate": 8.026167339453792e-07, + "logits/chosen": -1.3855167627334595, + "logits/rejected": -1.0264958143234253, + "logps/chosen": -564.0972900390625, + "logps/rejected": -1141.349609375, + "loss": 0.0905, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.11972753703594208, + "rewards/margins": 0.2558014690876007, + "rewards/rejected": -0.3755289912223816, + "step": 4010 + }, + { + "epoch": 0.77, + "learning_rate": 7.904506435266998e-07, + "logits/chosen": -1.6003910303115845, + "logits/rejected": -1.0422978401184082, + "logps/chosen": -550.9716796875, + "logps/rejected": -1115.030517578125, + "loss": 0.0766, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.11674098670482635, + "rewards/margins": 0.26603561639785767, + "rewards/rejected": -0.3827766180038452, + "step": 4020 + }, + { + "epoch": 0.77, + "learning_rate": 7.783601277613378e-07, + "logits/chosen": -1.427156686782837, + "logits/rejected": -1.0377559661865234, + "logps/chosen": -507.49102783203125, + "logps/rejected": -1206.0267333984375, + "loss": 0.0485, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.12479288876056671, + "rewards/margins": 0.3031744360923767, + "rewards/rejected": -0.42796725034713745, + "step": 4030 + }, + { + "epoch": 0.77, + "learning_rate": 7.66345721139003e-07, + "logits/chosen": -1.3637679815292358, + "logits/rejected": -1.0013728141784668, + "logps/chosen": -511.2125549316406, + "logps/rejected": -1133.815185546875, + "loss": 0.0831, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11282964795827866, + "rewards/margins": 0.2991596758365631, + "rewards/rejected": -0.4119893014431, + "step": 4040 + }, + { + "epoch": 0.77, + "learning_rate": 7.544079547848183e-07, + "logits/chosen": -1.6487575769424438, + "logits/rejected": -1.1513216495513916, + "logps/chosen": -475.4693298339844, + "logps/rejected": -1217.268310546875, + "loss": 0.0694, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.0943184494972229, + "rewards/margins": 0.32787054777145386, + "rewards/rejected": -0.42218899726867676, + "step": 4050 + }, + { + "epoch": 0.77, + "learning_rate": 7.425473564358457e-07, + "logits/chosen": -1.631055235862732, + "logits/rejected": -0.8726965188980103, + "logps/chosen": -664.9032592773438, + "logps/rejected": -1289.385498046875, + "loss": 0.0738, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14752522110939026, + "rewards/margins": 0.3137827515602112, + "rewards/rejected": -0.46130794286727905, + "step": 4060 + }, + { + "epoch": 0.78, + "learning_rate": 7.307644504177539e-07, + "logits/chosen": -1.548463225364685, + "logits/rejected": -1.0570564270019531, + "logps/chosen": -530.0663452148438, + "logps/rejected": -1171.269775390625, + "loss": 0.0673, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1084066778421402, + "rewards/margins": 0.29972022771835327, + "rewards/rejected": -0.40812692046165466, + "step": 4070 + }, + { + "epoch": 0.78, + "learning_rate": 7.190597576216385e-07, + "logits/chosen": -1.524394154548645, + "logits/rejected": -0.9965178370475769, + "logps/chosen": -584.171142578125, + "logps/rejected": -1225.20654296875, + "loss": 0.0845, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.13969434797763824, + "rewards/margins": 0.2867378890514374, + "rewards/rejected": -0.4264322817325592, + "step": 4080 + }, + { + "epoch": 0.78, + "learning_rate": 7.074337954809945e-07, + "logits/chosen": -1.565197229385376, + "logits/rejected": -1.1793510913848877, + "logps/chosen": -513.2730712890625, + "logps/rejected": -1081.368408203125, + "loss": 0.0747, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.11965713649988174, + "rewards/margins": 0.256893515586853, + "rewards/rejected": -0.3765506446361542, + "step": 4090 + }, + { + "epoch": 0.78, + "learning_rate": 6.958870779488447e-07, + "logits/chosen": -1.3191499710083008, + "logits/rejected": -1.0770232677459717, + "logps/chosen": -588.6995849609375, + "logps/rejected": -1273.339111328125, + "loss": 0.0705, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.13210487365722656, + "rewards/margins": 0.285585880279541, + "rewards/rejected": -0.4176907539367676, + "step": 4100 + }, + { + "epoch": 0.78, + "learning_rate": 6.844201154750176e-07, + "logits/chosen": -1.632425308227539, + "logits/rejected": -1.1742222309112549, + "logps/chosen": -581.7577514648438, + "logps/rejected": -1191.5350341796875, + "loss": 0.0732, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.09745045006275177, + "rewards/margins": 0.3215068578720093, + "rewards/rejected": -0.41895729303359985, + "step": 4110 + }, + { + "epoch": 0.78, + "learning_rate": 6.730334149835788e-07, + "logits/chosen": -1.598053216934204, + "logits/rejected": -1.1327468156814575, + "logps/chosen": -611.4447631835938, + "logps/rejected": -1131.943115234375, + "loss": 0.0696, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.11202128231525421, + "rewards/margins": 0.2611934244632721, + "rewards/rejected": -0.3732147216796875, + "step": 4120 + }, + { + "epoch": 0.79, + "learning_rate": 6.617274798504286e-07, + "logits/chosen": -1.5097488164901733, + "logits/rejected": -1.096771001815796, + "logps/chosen": -632.8778076171875, + "logps/rejected": -1227.204345703125, + "loss": 0.0667, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09271929413080215, + "rewards/margins": 0.3107157349586487, + "rewards/rejected": -0.4034350514411926, + "step": 4130 + }, + { + "epoch": 0.79, + "learning_rate": 6.505028098810407e-07, + "logits/chosen": -1.4080469608306885, + "logits/rejected": -0.762841522693634, + "logps/chosen": -522.7771606445312, + "logps/rejected": -1254.11181640625, + "loss": 0.0451, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.10984624922275543, + "rewards/margins": 0.32150495052337646, + "rewards/rejected": -0.4313511848449707, + "step": 4140 + }, + { + "epoch": 0.79, + "learning_rate": 6.393599012883709e-07, + "logits/chosen": -1.4583604335784912, + "logits/rejected": -0.8535671234130859, + "logps/chosen": -446.69268798828125, + "logps/rejected": -977.5573120117188, + "loss": 0.0725, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.08450157195329666, + "rewards/margins": 0.2547328472137451, + "rewards/rejected": -0.3392344117164612, + "step": 4150 + }, + { + "epoch": 0.79, + "learning_rate": 6.282992466709247e-07, + "logits/chosen": -1.353318691253662, + "logits/rejected": -0.9638460278511047, + "logps/chosen": -498.1649475097656, + "logps/rejected": -1317.721923828125, + "loss": 0.055, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.11576496064662933, + "rewards/margins": 0.34007635712623596, + "rewards/rejected": -0.4558412432670593, + "step": 4160 + }, + { + "epoch": 0.79, + "learning_rate": 6.17321334990973e-07, + "logits/chosen": -1.6091725826263428, + "logits/rejected": -1.1610180139541626, + "logps/chosen": -663.9533081054688, + "logps/rejected": -1272.82470703125, + "loss": 0.1, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.13398803770542145, + "rewards/margins": 0.2794187664985657, + "rewards/rejected": -0.4134067893028259, + "step": 4170 + }, + { + "epoch": 0.8, + "learning_rate": 6.064266515529419e-07, + "logits/chosen": -1.58311927318573, + "logits/rejected": -1.1838868856430054, + "logps/chosen": -530.5707397460938, + "logps/rejected": -1247.7623291015625, + "loss": 0.0641, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1168406754732132, + "rewards/margins": 0.30344682931900024, + "rewards/rejected": -0.42028751969337463, + "step": 4180 + }, + { + "epoch": 0.8, + "learning_rate": 5.956156779819586e-07, + "logits/chosen": -1.5429699420928955, + "logits/rejected": -0.9784320592880249, + "logps/chosen": -561.57958984375, + "logps/rejected": -1228.4150390625, + "loss": 0.0637, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.1092168539762497, + "rewards/margins": 0.3057827055454254, + "rewards/rejected": -0.4149995744228363, + "step": 4190 + }, + { + "epoch": 0.8, + "learning_rate": 5.848888922025553e-07, + "logits/chosen": -1.4399769306182861, + "logits/rejected": -1.0240304470062256, + "logps/chosen": -719.931396484375, + "logps/rejected": -1181.3548583984375, + "loss": 0.1283, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.16944709420204163, + "rewards/margins": 0.2121535837650299, + "rewards/rejected": -0.3816007077693939, + "step": 4200 + }, + { + "epoch": 0.8, + "learning_rate": 5.742467684175473e-07, + "logits/chosen": -1.7172958850860596, + "logits/rejected": -0.8663654327392578, + "logps/chosen": -643.3468017578125, + "logps/rejected": -1167.5980224609375, + "loss": 0.0659, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.12012696266174316, + "rewards/margins": 0.28652092814445496, + "rewards/rejected": -0.40664786100387573, + "step": 4210 + }, + { + "epoch": 0.8, + "learning_rate": 5.636897770870667e-07, + "logits/chosen": -1.4587557315826416, + "logits/rejected": -0.9724730253219604, + "logps/chosen": -552.9734497070312, + "logps/rejected": -1146.55322265625, + "loss": 0.0693, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.11410423368215561, + "rewards/margins": 0.2997921407222748, + "rewards/rejected": -0.4138964116573334, + "step": 4220 + }, + { + "epoch": 0.81, + "learning_rate": 5.532183849077651e-07, + "logits/chosen": -1.3316497802734375, + "logits/rejected": -0.9100072979927063, + "logps/chosen": -582.8646240234375, + "logps/rejected": -1303.5687255859375, + "loss": 0.075, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09175514429807663, + "rewards/margins": 0.33713623881340027, + "rewards/rejected": -0.4288913607597351, + "step": 4230 + }, + { + "epoch": 0.81, + "learning_rate": 5.428330547921809e-07, + "logits/chosen": -1.5007743835449219, + "logits/rejected": -0.6980355381965637, + "logps/chosen": -580.4159545898438, + "logps/rejected": -1250.198486328125, + "loss": 0.0566, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.09494878351688385, + "rewards/margins": 0.3454148471355438, + "rewards/rejected": -0.44036364555358887, + "step": 4240 + }, + { + "epoch": 0.81, + "learning_rate": 5.32534245848278e-07, + "logits/chosen": -1.7881839275360107, + "logits/rejected": -0.9309795498847961, + "logps/chosen": -604.7741088867188, + "logps/rejected": -1202.27880859375, + "loss": 0.0643, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12172114849090576, + "rewards/margins": 0.2990148663520813, + "rewards/rejected": -0.42073601484298706, + "step": 4250 + }, + { + "epoch": 0.81, + "learning_rate": 5.223224133591475e-07, + "logits/chosen": -1.5164127349853516, + "logits/rejected": -0.9188127517700195, + "logps/chosen": -619.7620849609375, + "logps/rejected": -1258.8397216796875, + "loss": 0.0661, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.10920927673578262, + "rewards/margins": 0.3095013201236725, + "rewards/rejected": -0.4187105596065521, + "step": 4260 + }, + { + "epoch": 0.81, + "learning_rate": 5.121980087628802e-07, + "logits/chosen": -1.653839349746704, + "logits/rejected": -1.0274848937988281, + "logps/chosen": -611.0381469726562, + "logps/rejected": -1282.819091796875, + "loss": 0.0573, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.12401090562343597, + "rewards/margins": 0.30650681257247925, + "rewards/rejected": -0.4305177330970764, + "step": 4270 + }, + { + "epoch": 0.82, + "learning_rate": 5.021614796326155e-07, + "logits/chosen": -1.6808589696884155, + "logits/rejected": -1.0335091352462769, + "logps/chosen": -587.5392456054688, + "logps/rejected": -1243.9168701171875, + "loss": 0.0807, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.11875418573617935, + "rewards/margins": 0.32050734758377075, + "rewards/rejected": -0.4392614960670471, + "step": 4280 + }, + { + "epoch": 0.82, + "learning_rate": 4.922132696567463e-07, + "logits/chosen": -1.5289744138717651, + "logits/rejected": -0.8728163838386536, + "logps/chosen": -581.1839599609375, + "logps/rejected": -1340.7869873046875, + "loss": 0.0481, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.10356955230236053, + "rewards/margins": 0.3660332262516022, + "rewards/rejected": -0.4696027636528015, + "step": 4290 + }, + { + "epoch": 0.82, + "learning_rate": 4.823538186193097e-07, + "logits/chosen": -1.6917448043823242, + "logits/rejected": -1.0616384744644165, + "logps/chosen": -584.0740966796875, + "logps/rejected": -1156.746826171875, + "loss": 0.0751, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.11137738078832626, + "rewards/margins": 0.2847217619419098, + "rewards/rejected": -0.3960992097854614, + "step": 4300 + }, + { + "epoch": 0.82, + "learning_rate": 4.725835623805494e-07, + "logits/chosen": -1.7799545526504517, + "logits/rejected": -1.0782766342163086, + "logps/chosen": -671.3793334960938, + "logps/rejected": -1264.038330078125, + "loss": 0.0916, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15113036334514618, + "rewards/margins": 0.29365235567092896, + "rewards/rejected": -0.44478267431259155, + "step": 4310 + }, + { + "epoch": 0.82, + "learning_rate": 4.6290293285763816e-07, + "logits/chosen": -1.568554162979126, + "logits/rejected": -1.0866026878356934, + "logps/chosen": -611.8372802734375, + "logps/rejected": -1266.229736328125, + "loss": 0.0709, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.11851924657821655, + "rewards/margins": 0.29408353567123413, + "rewards/rejected": -0.4126027524471283, + "step": 4320 + }, + { + "epoch": 0.82, + "learning_rate": 4.533123580055909e-07, + "logits/chosen": -1.5841357707977295, + "logits/rejected": -0.8760364651679993, + "logps/chosen": -622.0775756835938, + "logps/rejected": -1131.180908203125, + "loss": 0.086, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.1051706075668335, + "rewards/margins": 0.26066774129867554, + "rewards/rejected": -0.36583834886550903, + "step": 4330 + }, + { + "epoch": 0.83, + "learning_rate": 4.438122617983442e-07, + "logits/chosen": -1.7880083322525024, + "logits/rejected": -1.1745917797088623, + "logps/chosen": -556.8238525390625, + "logps/rejected": -1160.031982421875, + "loss": 0.0511, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10341344028711319, + "rewards/margins": 0.3007115423679352, + "rewards/rejected": -0.4041249752044678, + "step": 4340 + }, + { + "epoch": 0.83, + "learning_rate": 4.344030642100133e-07, + "logits/chosen": -1.4846160411834717, + "logits/rejected": -0.7649267315864563, + "logps/chosen": -535.6936645507812, + "logps/rejected": -1125.717041015625, + "loss": 0.0646, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08845969289541245, + "rewards/margins": 0.31220993399620056, + "rewards/rejected": -0.4006696343421936, + "step": 4350 + }, + { + "epoch": 0.83, + "learning_rate": 4.250851811963236e-07, + "logits/chosen": -1.4102054834365845, + "logits/rejected": -0.9203283190727234, + "logps/chosen": -609.0619506835938, + "logps/rejected": -1315.4090576171875, + "loss": 0.0778, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.14391538500785828, + "rewards/margins": 0.2974294126033783, + "rewards/rejected": -0.4413447380065918, + "step": 4360 + }, + { + "epoch": 0.83, + "learning_rate": 4.158590246762278e-07, + "logits/chosen": -1.4081202745437622, + "logits/rejected": -1.04823899269104, + "logps/chosen": -553.0235595703125, + "logps/rejected": -1091.3880615234375, + "loss": 0.0946, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.12480837106704712, + "rewards/margins": 0.2424461394548416, + "rewards/rejected": -0.36725446581840515, + "step": 4370 + }, + { + "epoch": 0.83, + "learning_rate": 4.0672500251369204e-07, + "logits/chosen": -1.7001903057098389, + "logits/rejected": -1.0566041469573975, + "logps/chosen": -509.71173095703125, + "logps/rejected": -1251.2403564453125, + "loss": 0.0554, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.09820754826068878, + "rewards/margins": 0.3354089856147766, + "rewards/rejected": -0.4336165487766266, + "step": 4380 + }, + { + "epoch": 0.84, + "learning_rate": 3.976835184996644e-07, + "logits/chosen": -1.4017540216445923, + "logits/rejected": -0.9382025599479675, + "logps/chosen": -563.7012939453125, + "logps/rejected": -1116.6812744140625, + "loss": 0.0802, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.11456887423992157, + "rewards/margins": 0.2591772973537445, + "rewards/rejected": -0.37374621629714966, + "step": 4390 + }, + { + "epoch": 0.84, + "learning_rate": 3.887349723342304e-07, + "logits/chosen": -1.4030841588974, + "logits/rejected": -0.8099882006645203, + "logps/chosen": -509.5604553222656, + "logps/rejected": -1305.5618896484375, + "loss": 0.0575, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08357210457324982, + "rewards/margins": 0.340532124042511, + "rewards/rejected": -0.4241042733192444, + "step": 4400 + }, + { + "epoch": 0.84, + "learning_rate": 3.798797596089351e-07, + "logits/chosen": -1.508704662322998, + "logits/rejected": -0.7648983001708984, + "logps/chosen": -666.0118408203125, + "logps/rejected": -1355.562255859375, + "loss": 0.0627, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.16047951579093933, + "rewards/margins": 0.33138307929039, + "rewards/rejected": -0.49186262488365173, + "step": 4410 + }, + { + "epoch": 0.84, + "learning_rate": 3.711182717893011e-07, + "logits/chosen": -1.6102008819580078, + "logits/rejected": -0.9493368864059448, + "logps/chosen": -687.1583862304688, + "logps/rejected": -1226.296630859375, + "loss": 0.1014, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.155551940202713, + "rewards/margins": 0.2699928879737854, + "rewards/rejected": -0.4255448281764984, + "step": 4420 + }, + { + "epoch": 0.84, + "learning_rate": 3.624508961975215e-07, + "logits/chosen": -1.6212804317474365, + "logits/rejected": -1.1436877250671387, + "logps/chosen": -575.4029541015625, + "logps/rejected": -1286.482177734375, + "loss": 0.0744, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.1249982938170433, + "rewards/margins": 0.3159942030906677, + "rewards/rejected": -0.4409925043582916, + "step": 4430 + }, + { + "epoch": 0.85, + "learning_rate": 3.538780159953348e-07, + "logits/chosen": -1.238149642944336, + "logits/rejected": -0.9261630773544312, + "logps/chosen": -583.4049072265625, + "logps/rejected": -1233.3751220703125, + "loss": 0.0636, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.1378888636827469, + "rewards/margins": 0.2824627161026001, + "rewards/rejected": -0.4203515946865082, + "step": 4440 + }, + { + "epoch": 0.85, + "learning_rate": 3.454000101670901e-07, + "logits/chosen": -1.3475556373596191, + "logits/rejected": -0.9213441610336304, + "logps/chosen": -625.7487182617188, + "logps/rejected": -1175.872314453125, + "loss": 0.0997, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.1552494466304779, + "rewards/margins": 0.24734191596508026, + "rewards/rejected": -0.40259137749671936, + "step": 4450 + }, + { + "epoch": 0.85, + "learning_rate": 3.3701725350299143e-07, + "logits/chosen": -1.5352661609649658, + "logits/rejected": -1.1658331155776978, + "logps/chosen": -521.9005126953125, + "logps/rejected": -1237.071044921875, + "loss": 0.0584, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.11491229385137558, + "rewards/margins": 0.31515389680862427, + "rewards/rejected": -0.43006619811058044, + "step": 4460 + }, + { + "epoch": 0.85, + "learning_rate": 3.2873011658252796e-07, + "logits/chosen": -1.5209187269210815, + "logits/rejected": -0.6413207054138184, + "logps/chosen": -619.4137573242188, + "logps/rejected": -1321.85107421875, + "loss": 0.0355, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.10239653289318085, + "rewards/margins": 0.3683980703353882, + "rewards/rejected": -0.47079458832740784, + "step": 4470 + }, + { + "epoch": 0.85, + "learning_rate": 3.2053896575809426e-07, + "logits/chosen": -1.7566978931427002, + "logits/rejected": -1.147778868675232, + "logps/chosen": -541.6817016601562, + "logps/rejected": -1217.3521728515625, + "loss": 0.06, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.10900872945785522, + "rewards/margins": 0.3140162527561188, + "rewards/rejected": -0.423024982213974, + "step": 4480 + }, + { + "epoch": 0.86, + "learning_rate": 3.124441631387931e-07, + "logits/chosen": -1.3777107000350952, + "logits/rejected": -0.8700397610664368, + "logps/chosen": -538.3272094726562, + "logps/rejected": -1210.5562744140625, + "loss": 0.089, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.09773439168930054, + "rewards/margins": 0.3020879328250885, + "rewards/rejected": -0.39982232451438904, + "step": 4490 + }, + { + "epoch": 0.86, + "learning_rate": 3.044460665744284e-07, + "logits/chosen": -1.3997788429260254, + "logits/rejected": -0.8937880396842957, + "logps/chosen": -535.1387939453125, + "logps/rejected": -1171.378662109375, + "loss": 0.073, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.1069500669836998, + "rewards/margins": 0.30229440331459045, + "rewards/rejected": -0.40924444794654846, + "step": 4500 + }, + { + "epoch": 0.86, + "learning_rate": 2.9654502963968575e-07, + "logits/chosen": -1.4595518112182617, + "logits/rejected": -0.9943321943283081, + "logps/chosen": -663.2821655273438, + "logps/rejected": -1067.5167236328125, + "loss": 0.1362, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.14275141060352325, + "rewards/margins": 0.21759268641471863, + "rewards/rejected": -0.3603441119194031, + "step": 4510 + }, + { + "epoch": 0.86, + "learning_rate": 2.8874140161849915e-07, + "logits/chosen": -1.4665237665176392, + "logits/rejected": -0.9472633600234985, + "logps/chosen": -534.7359008789062, + "logps/rejected": -1252.173583984375, + "loss": 0.0576, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.10868203639984131, + "rewards/margins": 0.32610076665878296, + "rewards/rejected": -0.43478280305862427, + "step": 4520 + }, + { + "epoch": 0.86, + "learning_rate": 2.810355274886148e-07, + "logits/chosen": -1.4627294540405273, + "logits/rejected": -0.7866867780685425, + "logps/chosen": -580.9840087890625, + "logps/rejected": -1238.630859375, + "loss": 0.056, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.06367157399654388, + "rewards/margins": 0.33369964361190796, + "rewards/rejected": -0.39737120270729065, + "step": 4530 + }, + { + "epoch": 0.86, + "learning_rate": 2.7342774790633686e-07, + "logits/chosen": -1.4155508279800415, + "logits/rejected": -0.8982292413711548, + "logps/chosen": -569.6241455078125, + "logps/rejected": -1352.642333984375, + "loss": 0.0385, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.12062004953622818, + "rewards/margins": 0.3387775719165802, + "rewards/rejected": -0.4593976140022278, + "step": 4540 + }, + { + "epoch": 0.87, + "learning_rate": 2.6591839919146963e-07, + "logits/chosen": -1.459616780281067, + "logits/rejected": -1.2273374795913696, + "logps/chosen": -510.34149169921875, + "logps/rejected": -1148.1497802734375, + "loss": 0.0898, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.11455903947353363, + "rewards/margins": 0.27048274874687195, + "rewards/rejected": -0.38504183292388916, + "step": 4550 + }, + { + "epoch": 0.87, + "learning_rate": 2.58507813312448e-07, + "logits/chosen": -1.6770299673080444, + "logits/rejected": -1.0825966596603394, + "logps/chosen": -490.6595764160156, + "logps/rejected": -1214.4444580078125, + "loss": 0.0705, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.1060783639550209, + "rewards/margins": 0.30906233191490173, + "rewards/rejected": -0.41514068841934204, + "step": 4560 + }, + { + "epoch": 0.87, + "learning_rate": 2.511963178716648e-07, + "logits/chosen": -1.7969233989715576, + "logits/rejected": -1.0599218606948853, + "logps/chosen": -517.402587890625, + "logps/rejected": -1123.163330078125, + "loss": 0.0669, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.09941872954368591, + "rewards/margins": 0.28655630350112915, + "rewards/rejected": -0.38597503304481506, + "step": 4570 + }, + { + "epoch": 0.87, + "learning_rate": 2.439842360909864e-07, + "logits/chosen": -1.678088903427124, + "logits/rejected": -0.8970896601676941, + "logps/chosen": -642.3028564453125, + "logps/rejected": -1249.11767578125, + "loss": 0.0721, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13762176036834717, + "rewards/margins": 0.3055310547351837, + "rewards/rejected": -0.4431528151035309, + "step": 4580 + }, + { + "epoch": 0.87, + "learning_rate": 2.3687188679746314e-07, + "logits/chosen": -1.7187156677246094, + "logits/rejected": -1.1094046831130981, + "logps/chosen": -660.9083862304688, + "logps/rejected": -1219.8033447265625, + "loss": 0.0702, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.11426536738872528, + "rewards/margins": 0.27515870332717896, + "rewards/rejected": -0.38942405581474304, + "step": 4590 + }, + { + "epoch": 0.88, + "learning_rate": 2.2985958440923772e-07, + "logits/chosen": -1.6766948699951172, + "logits/rejected": -0.9987794756889343, + "logps/chosen": -513.1531372070312, + "logps/rejected": -1196.9598388671875, + "loss": 0.0741, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.07514746487140656, + "rewards/margins": 0.32911479473114014, + "rewards/rejected": -0.4042623043060303, + "step": 4600 + }, + { + "epoch": 0.88, + "learning_rate": 2.2294763892164284e-07, + "logits/chosen": -1.507949948310852, + "logits/rejected": -0.9113578796386719, + "logps/chosen": -518.02294921875, + "logps/rejected": -1056.095947265625, + "loss": 0.064, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.0894726812839508, + "rewards/margins": 0.2744660973548889, + "rewards/rejected": -0.3639387786388397, + "step": 4610 + }, + { + "epoch": 0.88, + "learning_rate": 2.1613635589349756e-07, + "logits/chosen": -1.50367271900177, + "logits/rejected": -1.0042375326156616, + "logps/chosen": -592.36865234375, + "logps/rejected": -1257.645751953125, + "loss": 0.0786, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.11847710609436035, + "rewards/margins": 0.3014236092567444, + "rewards/rejected": -0.41990071535110474, + "step": 4620 + }, + { + "epoch": 0.88, + "learning_rate": 2.094260364336026e-07, + "logits/chosen": -1.5850324630737305, + "logits/rejected": -1.1900346279144287, + "logps/chosen": -493.8600158691406, + "logps/rejected": -1127.6407470703125, + "loss": 0.077, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.11056572198867798, + "rewards/margins": 0.2895745635032654, + "rewards/rejected": -0.40014028549194336, + "step": 4630 + }, + { + "epoch": 0.88, + "learning_rate": 2.0281697718742333e-07, + "logits/chosen": -1.6359741687774658, + "logits/rejected": -0.8543532490730286, + "logps/chosen": -719.7067260742188, + "logps/rejected": -1214.4210205078125, + "loss": 0.0717, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.11109743267297745, + "rewards/margins": 0.2912042737007141, + "rewards/rejected": -0.40230169892311096, + "step": 4640 + }, + { + "epoch": 0.89, + "learning_rate": 1.9630947032398068e-07, + "logits/chosen": -1.665879487991333, + "logits/rejected": -0.9275639653205872, + "logps/chosen": -542.2432861328125, + "logps/rejected": -1198.4542236328125, + "loss": 0.0642, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.11047229915857315, + "rewards/margins": 0.3198242485523224, + "rewards/rejected": -0.43029651045799255, + "step": 4650 + }, + { + "epoch": 0.89, + "learning_rate": 1.899038035229342e-07, + "logits/chosen": -1.3144588470458984, + "logits/rejected": -0.8643208742141724, + "logps/chosen": -531.2198486328125, + "logps/rejected": -1097.012451171875, + "loss": 0.0797, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.1076936349272728, + "rewards/margins": 0.27217811346054077, + "rewards/rejected": -0.37987175583839417, + "step": 4660 + }, + { + "epoch": 0.89, + "learning_rate": 1.8360025996186138e-07, + "logits/chosen": -1.4649903774261475, + "logits/rejected": -0.9471826553344727, + "logps/chosen": -557.9927978515625, + "logps/rejected": -1155.4908447265625, + "loss": 0.0715, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.10311124473810196, + "rewards/margins": 0.27083897590637207, + "rewards/rejected": -0.3739502727985382, + "step": 4670 + }, + { + "epoch": 0.89, + "learning_rate": 1.7739911830374352e-07, + "logits/chosen": -1.4640676975250244, + "logits/rejected": -0.7999382019042969, + "logps/chosen": -548.3810424804688, + "logps/rejected": -1133.371337890625, + "loss": 0.0762, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.11513851583003998, + "rewards/margins": 0.25577312707901, + "rewards/rejected": -0.3709116578102112, + "step": 4680 + }, + { + "epoch": 0.89, + "learning_rate": 1.713006526846439e-07, + "logits/chosen": -1.418255090713501, + "logits/rejected": -0.7617162466049194, + "logps/chosen": -526.6948852539062, + "logps/rejected": -1178.375244140625, + "loss": 0.0627, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10353302955627441, + "rewards/margins": 0.3077407777309418, + "rewards/rejected": -0.4112738072872162, + "step": 4690 + }, + { + "epoch": 0.9, + "learning_rate": 1.6530513270159116e-07, + "logits/chosen": -1.6661808490753174, + "logits/rejected": -1.0216115713119507, + "logps/chosen": -575.176513671875, + "logps/rejected": -1170.5579833984375, + "loss": 0.0789, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.12035945802927017, + "rewards/margins": 0.28512948751449585, + "rewards/rejected": -0.40548890829086304, + "step": 4700 + }, + { + "epoch": 0.9, + "learning_rate": 1.59412823400657e-07, + "logits/chosen": -1.348259687423706, + "logits/rejected": -0.726833701133728, + "logps/chosen": -512.1627197265625, + "logps/rejected": -1199.069091796875, + "loss": 0.0751, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.10438477993011475, + "rewards/margins": 0.2982539236545563, + "rewards/rejected": -0.4026387333869934, + "step": 4710 + }, + { + "epoch": 0.9, + "learning_rate": 1.5362398526524463e-07, + "logits/chosen": -1.6081886291503906, + "logits/rejected": -1.0844703912734985, + "logps/chosen": -531.6392822265625, + "logps/rejected": -1220.3037109375, + "loss": 0.0601, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.09040726721286774, + "rewards/margins": 0.32025203108787537, + "rewards/rejected": -0.4106592535972595, + "step": 4720 + }, + { + "epoch": 0.9, + "learning_rate": 1.4793887420457008e-07, + "logits/chosen": -1.6161870956420898, + "logits/rejected": -1.0405725240707397, + "logps/chosen": -516.3242797851562, + "logps/rejected": -1223.478759765625, + "loss": 0.0697, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.09660589694976807, + "rewards/margins": 0.3125608563423157, + "rewards/rejected": -0.40916675329208374, + "step": 4730 + }, + { + "epoch": 0.9, + "learning_rate": 1.4235774154234855e-07, + "logits/chosen": -1.5217710733413696, + "logits/rejected": -0.954803466796875, + "logps/chosen": -702.4046630859375, + "logps/rejected": -1238.7601318359375, + "loss": 0.0664, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14844849705696106, + "rewards/margins": 0.2766263484954834, + "rewards/rejected": -0.42507481575012207, + "step": 4740 + }, + { + "epoch": 0.9, + "learning_rate": 1.368808340056879e-07, + "logits/chosen": -1.5613772869110107, + "logits/rejected": -0.8760370016098022, + "logps/chosen": -583.1823120117188, + "logps/rejected": -1393.697998046875, + "loss": 0.0325, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.1385408341884613, + "rewards/margins": 0.35670894384384155, + "rewards/rejected": -0.49524980783462524, + "step": 4750 + }, + { + "epoch": 0.91, + "learning_rate": 1.31508393714177e-07, + "logits/chosen": -1.7405493259429932, + "logits/rejected": -1.084986686706543, + "logps/chosen": -643.45849609375, + "logps/rejected": -1322.990478515625, + "loss": 0.0696, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.12244760990142822, + "rewards/margins": 0.32172149419784546, + "rewards/rejected": -0.44416913390159607, + "step": 4760 + }, + { + "epoch": 0.91, + "learning_rate": 1.2624065816918414e-07, + "logits/chosen": -1.6162086725234985, + "logits/rejected": -1.1142741441726685, + "logps/chosen": -675.887451171875, + "logps/rejected": -1231.4742431640625, + "loss": 0.1102, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.13799907267093658, + "rewards/margins": 0.2717846930027008, + "rewards/rejected": -0.4097837507724762, + "step": 4770 + }, + { + "epoch": 0.91, + "learning_rate": 1.210778602433596e-07, + "logits/chosen": -1.5767656564712524, + "logits/rejected": -0.9563184976577759, + "logps/chosen": -629.3272705078125, + "logps/rejected": -1314.0963134765625, + "loss": 0.0657, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.12461809068918228, + "rewards/margins": 0.34095823764801025, + "rewards/rejected": -0.46557626128196716, + "step": 4780 + }, + { + "epoch": 0.91, + "learning_rate": 1.1602022817033709e-07, + "logits/chosen": -1.5052716732025146, + "logits/rejected": -1.0084514617919922, + "logps/chosen": -554.8811645507812, + "logps/rejected": -1199.1683349609375, + "loss": 0.0681, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.11769001185894012, + "rewards/margins": 0.27790355682373047, + "rewards/rejected": -0.3955935835838318, + "step": 4790 + }, + { + "epoch": 0.91, + "learning_rate": 1.1106798553464804e-07, + "logits/chosen": -1.4443817138671875, + "logits/rejected": -0.8818323016166687, + "logps/chosen": -675.2034301757812, + "logps/rejected": -1283.4957275390625, + "loss": 0.0803, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.1482025682926178, + "rewards/margins": 0.3057909905910492, + "rewards/rejected": -0.4539934992790222, + "step": 4800 + }, + { + "epoch": 0.92, + "learning_rate": 1.0622135126183514e-07, + "logits/chosen": -1.4301539659500122, + "logits/rejected": -0.8788467645645142, + "logps/chosen": -489.4832458496094, + "logps/rejected": -1205.6776123046875, + "loss": 0.0697, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.10816861689090729, + "rewards/margins": 0.31940537691116333, + "rewards/rejected": -0.4275740087032318, + "step": 4810 + }, + { + "epoch": 0.92, + "learning_rate": 1.0148053960877396e-07, + "logits/chosen": -1.4718639850616455, + "logits/rejected": -1.015871286392212, + "logps/chosen": -622.8069458007812, + "logps/rejected": -1381.413818359375, + "loss": 0.0532, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.14863041043281555, + "rewards/margins": 0.34109631180763245, + "rewards/rejected": -0.4897266924381256, + "step": 4820 + }, + { + "epoch": 0.92, + "learning_rate": 9.684576015420277e-08, + "logits/chosen": -1.6295974254608154, + "logits/rejected": -0.9986993074417114, + "logps/chosen": -497.78411865234375, + "logps/rejected": -1245.800048828125, + "loss": 0.0478, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11491765081882477, + "rewards/margins": 0.3258935213088989, + "rewards/rejected": -0.4408111572265625, + "step": 4830 + }, + { + "epoch": 0.92, + "learning_rate": 9.23172177894574e-08, + "logits/chosen": -1.498579502105713, + "logits/rejected": -0.9404312372207642, + "logps/chosen": -580.7014770507812, + "logps/rejected": -1169.010498046875, + "loss": 0.0828, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.146192267537117, + "rewards/margins": 0.2631181478500366, + "rewards/rejected": -0.40931040048599243, + "step": 4840 + }, + { + "epoch": 0.92, + "learning_rate": 8.78951127094127e-08, + "logits/chosen": -1.4492034912109375, + "logits/rejected": -0.902155876159668, + "logps/chosen": -580.7665405273438, + "logps/rejected": -1362.78125, + "loss": 0.0515, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.1426132470369339, + "rewards/margins": 0.3427208364009857, + "rewards/rejected": -0.4853340983390808, + "step": 4850 + }, + { + "epoch": 0.93, + "learning_rate": 8.357964040363209e-08, + "logits/chosen": -1.699753999710083, + "logits/rejected": -0.9447946548461914, + "logps/chosen": -643.726318359375, + "logps/rejected": -1170.8514404296875, + "loss": 0.1054, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.14456409215927124, + "rewards/margins": 0.26414671540260315, + "rewards/rejected": -0.408710777759552, + "step": 4860 + }, + { + "epoch": 0.93, + "learning_rate": 7.937099164772699e-08, + "logits/chosen": -1.4378997087478638, + "logits/rejected": -1.1139097213745117, + "logps/chosen": -568.4095458984375, + "logps/rejected": -1186.299072265625, + "loss": 0.0868, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.13183560967445374, + "rewards/margins": 0.2873497009277344, + "rewards/rejected": -0.4191853404045105, + "step": 4870 + }, + { + "epoch": 0.93, + "learning_rate": 7.526935249492245e-08, + "logits/chosen": -1.39249587059021, + "logits/rejected": -0.9399245381355286, + "logps/chosen": -592.54541015625, + "logps/rejected": -1305.0413818359375, + "loss": 0.0652, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13843494653701782, + "rewards/margins": 0.3276015818119049, + "rewards/rejected": -0.4660365581512451, + "step": 4880 + }, + { + "epoch": 0.93, + "learning_rate": 7.127490426783124e-08, + "logits/chosen": -1.7015079259872437, + "logits/rejected": -0.9024487733840942, + "logps/chosen": -660.8875122070312, + "logps/rejected": -1300.248291015625, + "loss": 0.0513, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.1391044408082962, + "rewards/margins": 0.32587337493896484, + "rewards/rejected": -0.46497783064842224, + "step": 4890 + }, + { + "epoch": 0.93, + "learning_rate": 6.738782355044048e-08, + "logits/chosen": -1.6943657398223877, + "logits/rejected": -0.9999350309371948, + "logps/chosen": -548.2307739257812, + "logps/rejected": -1201.4068603515625, + "loss": 0.0649, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.09915059804916382, + "rewards/margins": 0.3060828745365143, + "rewards/rejected": -0.4052334725856781, + "step": 4900 + }, + { + "epoch": 0.94, + "learning_rate": 6.360828218030191e-08, + "logits/chosen": -1.4893453121185303, + "logits/rejected": -1.2656629085540771, + "logps/chosen": -637.3431396484375, + "logps/rejected": -1251.2078857421875, + "loss": 0.0828, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.1782352179288864, + "rewards/margins": 0.25589674711227417, + "rewards/rejected": -0.4341319501399994, + "step": 4910 + }, + { + "epoch": 0.94, + "learning_rate": 5.993644724093889e-08, + "logits/chosen": -1.530643343925476, + "logits/rejected": -1.0829918384552002, + "logps/chosen": -568.8175048828125, + "logps/rejected": -1262.355224609375, + "loss": 0.0742, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.12133932113647461, + "rewards/margins": 0.3102935254573822, + "rewards/rejected": -0.4316328465938568, + "step": 4920 + }, + { + "epoch": 0.94, + "learning_rate": 5.637248105445775e-08, + "logits/chosen": -1.4310568571090698, + "logits/rejected": -0.8825603723526001, + "logps/chosen": -502.86248779296875, + "logps/rejected": -1256.2763671875, + "loss": 0.0616, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.1074456200003624, + "rewards/margins": 0.3345298767089844, + "rewards/rejected": -0.44197553396224976, + "step": 4930 + }, + { + "epoch": 0.94, + "learning_rate": 5.291654117437262e-08, + "logits/chosen": -1.4048588275909424, + "logits/rejected": -0.9273044466972351, + "logps/chosen": -544.2005004882812, + "logps/rejected": -1113.27490234375, + "loss": 0.0786, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12635964155197144, + "rewards/margins": 0.265400230884552, + "rewards/rejected": -0.39175987243652344, + "step": 4940 + }, + { + "epoch": 0.94, + "learning_rate": 4.956878037864044e-08, + "logits/chosen": -1.393763780593872, + "logits/rejected": -0.8130865097045898, + "logps/chosen": -647.5671997070312, + "logps/rejected": -1257.166015625, + "loss": 0.0689, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.15376783907413483, + "rewards/margins": 0.29182344675064087, + "rewards/rejected": -0.4455912709236145, + "step": 4950 + }, + { + "epoch": 0.94, + "learning_rate": 4.632934666290778e-08, + "logits/chosen": -1.3255221843719482, + "logits/rejected": -0.9536614418029785, + "logps/chosen": -657.3424682617188, + "logps/rejected": -1044.6119384765625, + "loss": 0.099, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.15257620811462402, + "rewards/margins": 0.20966574549674988, + "rewards/rejected": -0.3622419536113739, + "step": 4960 + }, + { + "epoch": 0.95, + "learning_rate": 4.319838323396691e-08, + "logits/chosen": -1.6361057758331299, + "logits/rejected": -0.7931776642799377, + "logps/chosen": -626.0015869140625, + "logps/rejected": -1315.7972412109375, + "loss": 0.0431, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.13260582089424133, + "rewards/margins": 0.3334971070289612, + "rewards/rejected": -0.4661029279232025, + "step": 4970 + }, + { + "epoch": 0.95, + "learning_rate": 4.017602850342584e-08, + "logits/chosen": -1.3285863399505615, + "logits/rejected": -0.7825818061828613, + "logps/chosen": -550.2575073242188, + "logps/rejected": -1211.78125, + "loss": 0.0628, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10753561556339264, + "rewards/margins": 0.3216678500175476, + "rewards/rejected": -0.42920345067977905, + "step": 4980 + }, + { + "epoch": 0.95, + "learning_rate": 3.7262416081589866e-08, + "logits/chosen": -1.5621405839920044, + "logits/rejected": -0.8195871114730835, + "logps/chosen": -659.1685791015625, + "logps/rejected": -1270.6734619140625, + "loss": 0.0554, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.12191258370876312, + "rewards/margins": 0.34549933671951294, + "rewards/rejected": -0.46741190552711487, + "step": 4990 + }, + { + "epoch": 0.95, + "learning_rate": 3.445767477155443e-08, + "logits/chosen": -1.5442430973052979, + "logits/rejected": -1.2246438264846802, + "logps/chosen": -547.2069091796875, + "logps/rejected": -1145.769287109375, + "loss": 0.0771, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.13483698666095734, + "rewards/margins": 0.2825453281402588, + "rewards/rejected": -0.41738229990005493, + "step": 5000 + }, + { + "epoch": 0.95, + "learning_rate": 3.1761928563510956e-08, + "logits/chosen": -1.7152458429336548, + "logits/rejected": -0.9394499063491821, + "logps/chosen": -649.7903442382812, + "logps/rejected": -1298.006591796875, + "loss": 0.0552, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.1411394327878952, + "rewards/margins": 0.32097527384757996, + "rewards/rejected": -0.46211472153663635, + "step": 5010 + }, + { + "epoch": 0.96, + "learning_rate": 2.917529662926549e-08, + "logits/chosen": -1.520808219909668, + "logits/rejected": -0.945067286491394, + "logps/chosen": -517.6978759765625, + "logps/rejected": -1175.654541015625, + "loss": 0.0584, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.1118912473320961, + "rewards/margins": 0.30338698625564575, + "rewards/rejected": -0.41527828574180603, + "step": 5020 + }, + { + "epoch": 0.96, + "learning_rate": 2.669789331697148e-08, + "logits/chosen": -1.5593969821929932, + "logits/rejected": -1.092525839805603, + "logps/chosen": -691.4847412109375, + "logps/rejected": -1298.0960693359375, + "loss": 0.0926, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.16525861620903015, + "rewards/margins": 0.2925161123275757, + "rewards/rejected": -0.45777472853660583, + "step": 5030 + }, + { + "epoch": 0.96, + "learning_rate": 2.4329828146074096e-08, + "logits/chosen": -1.4827836751937866, + "logits/rejected": -1.064502477645874, + "logps/chosen": -419.036376953125, + "logps/rejected": -958.3380737304688, + "loss": 0.1329, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.10691438615322113, + "rewards/margins": 0.2166411131620407, + "rewards/rejected": -0.32355549931526184, + "step": 5040 + }, + { + "epoch": 0.96, + "learning_rate": 2.20712058024683e-08, + "logits/chosen": -1.5136808156967163, + "logits/rejected": -0.7604056000709534, + "logps/chosen": -545.261474609375, + "logps/rejected": -1140.312255859375, + "loss": 0.0594, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.1134977787733078, + "rewards/margins": 0.29956376552581787, + "rewards/rejected": -0.41306155920028687, + "step": 5050 + }, + { + "epoch": 0.96, + "learning_rate": 1.9922126133870568e-08, + "logits/chosen": -1.685162901878357, + "logits/rejected": -0.8499481081962585, + "logps/chosen": -608.2676391601562, + "logps/rejected": -1121.2872314453125, + "loss": 0.0749, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.11034168303012848, + "rewards/margins": 0.2738109230995178, + "rewards/rejected": -0.3841525912284851, + "step": 5060 + }, + { + "epoch": 0.97, + "learning_rate": 1.7882684145406616e-08, + "logits/chosen": -1.9093729257583618, + "logits/rejected": -1.2209051847457886, + "logps/chosen": -613.359375, + "logps/rejected": -1183.755615234375, + "loss": 0.0905, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.1341812163591385, + "rewards/margins": 0.27863994240760803, + "rewards/rejected": -0.4128211438655853, + "step": 5070 + }, + { + "epoch": 0.97, + "learning_rate": 1.595296999541057e-08, + "logits/chosen": -1.5866085290908813, + "logits/rejected": -1.153424620628357, + "logps/chosen": -531.4694213867188, + "logps/rejected": -1303.758056640625, + "loss": 0.0664, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.12967099249362946, + "rewards/margins": 0.3269729018211365, + "rewards/rejected": -0.45664387941360474, + "step": 5080 + }, + { + "epoch": 0.97, + "learning_rate": 1.4133068991437903e-08, + "logits/chosen": -1.5420364141464233, + "logits/rejected": -0.7891443371772766, + "logps/chosen": -577.220947265625, + "logps/rejected": -1330.06640625, + "loss": 0.0463, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1222272664308548, + "rewards/margins": 0.3585565686225891, + "rewards/rejected": -0.4807838499546051, + "step": 5090 + }, + { + "epoch": 0.97, + "learning_rate": 1.2423061586496476e-08, + "logits/chosen": -1.3690813779830933, + "logits/rejected": -0.7128039598464966, + "logps/chosen": -578.891357421875, + "logps/rejected": -1208.0849609375, + "loss": 0.0761, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.13187679648399353, + "rewards/margins": 0.2918475270271301, + "rewards/rejected": -0.42372435331344604, + "step": 5100 + }, + { + "epoch": 0.97, + "learning_rate": 1.0823023375489128e-08, + "logits/chosen": -1.5114414691925049, + "logits/rejected": -0.9036266207695007, + "logps/chosen": -595.9454345703125, + "logps/rejected": -1167.0740966796875, + "loss": 0.0934, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.14127907156944275, + "rewards/margins": 0.25682133436203003, + "rewards/rejected": -0.39810046553611755, + "step": 5110 + }, + { + "epoch": 0.98, + "learning_rate": 9.333025091870507e-09, + "logits/chosen": -1.3770151138305664, + "logits/rejected": -1.0183017253875732, + "logps/chosen": -571.3338623046875, + "logps/rejected": -1342.66455078125, + "loss": 0.0561, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.12807521224021912, + "rewards/margins": 0.32246822118759155, + "rewards/rejected": -0.45054346323013306, + "step": 5120 + }, + { + "epoch": 0.98, + "learning_rate": 7.95313260452263e-09, + "logits/chosen": -1.7575418949127197, + "logits/rejected": -1.016129970550537, + "logps/chosen": -574.4557495117188, + "logps/rejected": -1338.8140869140625, + "loss": 0.0543, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1301496922969818, + "rewards/margins": 0.3553154766559601, + "rewards/rejected": -0.4854651093482971, + "step": 5130 + }, + { + "epoch": 0.98, + "learning_rate": 6.683406914840818e-09, + "logits/chosen": -1.581937313079834, + "logits/rejected": -0.8572310209274292, + "logps/chosen": -605.4105834960938, + "logps/rejected": -1300.188720703125, + "loss": 0.0532, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.1113300696015358, + "rewards/margins": 0.3524828255176544, + "rewards/rejected": -0.4638128876686096, + "step": 5140 + }, + { + "epoch": 0.98, + "learning_rate": 5.523904154037529e-09, + "logits/chosen": -1.8323339223861694, + "logits/rejected": -1.1476819515228271, + "logps/chosen": -612.7574462890625, + "logps/rejected": -1313.646728515625, + "loss": 0.0479, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.11692248284816742, + "rewards/margins": 0.3401055634021759, + "rewards/rejected": -0.45702800154685974, + "step": 5150 + }, + { + "epoch": 0.98, + "learning_rate": 4.474675580662113e-09, + "logits/chosen": -1.613720178604126, + "logits/rejected": -0.799970269203186, + "logps/chosen": -609.1049194335938, + "logps/rejected": -1300.5469970703125, + "loss": 0.0586, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.12533780932426453, + "rewards/margins": 0.3247934877872467, + "rewards/rejected": -0.4501313269138336, + "step": 5160 + }, + { + "epoch": 0.98, + "learning_rate": 3.5357675783331823e-09, + "logits/chosen": -1.4680449962615967, + "logits/rejected": -0.8810006976127625, + "logps/chosen": -550.5829467773438, + "logps/rejected": -1324.5640869140625, + "loss": 0.0451, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09199674427509308, + "rewards/margins": 0.3480994701385498, + "rewards/rejected": -0.4400961995124817, + "step": 5170 + }, + { + "epoch": 0.99, + "learning_rate": 2.7072216536885855e-09, + "logits/chosen": -1.4214956760406494, + "logits/rejected": -1.0915638208389282, + "logps/chosen": -592.4363403320312, + "logps/rejected": -1323.13720703125, + "loss": 0.0455, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.11093990504741669, + "rewards/margins": 0.3408183157444, + "rewards/rejected": -0.4517582058906555, + "step": 5180 + }, + { + "epoch": 0.99, + "learning_rate": 1.989074434551874e-09, + "logits/chosen": -1.5371973514556885, + "logits/rejected": -0.8911060094833374, + "logps/chosen": -626.3933715820312, + "logps/rejected": -1309.9378662109375, + "loss": 0.0631, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.13457807898521423, + "rewards/margins": 0.3284324109554291, + "rewards/rejected": -0.4630104601383209, + "step": 5190 + }, + { + "epoch": 0.99, + "learning_rate": 1.3813576683111007e-09, + "logits/chosen": -1.585335373878479, + "logits/rejected": -0.9166946411132812, + "logps/chosen": -645.6207885742188, + "logps/rejected": -1415.727783203125, + "loss": 0.0712, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.13262395560741425, + "rewards/margins": 0.3447038233280182, + "rewards/rejected": -0.47732776403427124, + "step": 5200 + }, + { + "epoch": 0.99, + "learning_rate": 8.840982205160498e-10, + "logits/chosen": -1.6240886449813843, + "logits/rejected": -0.658467710018158, + "logps/chosen": -574.8680419921875, + "logps/rejected": -1388.4107666015625, + "loss": 0.0295, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.09021851420402527, + "rewards/margins": 0.39147868752479553, + "rewards/rejected": -0.4816971719264984, + "step": 5210 + }, + { + "epoch": 0.99, + "learning_rate": 4.973180736911332e-10, + "logits/chosen": -1.6752641201019287, + "logits/rejected": -1.2019436359405518, + "logps/chosen": -498.8638610839844, + "logps/rejected": -1144.5714111328125, + "loss": 0.079, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1074768528342247, + "rewards/margins": 0.2879261374473572, + "rewards/rejected": -0.3954029977321625, + "step": 5220 + }, + { + "epoch": 1.0, + "learning_rate": 2.2103432636366718e-10, + "logits/chosen": -1.4439948797225952, + "logits/rejected": -1.0657399892807007, + "logps/chosen": -604.0296630859375, + "logps/rejected": -1356.142578125, + "loss": 0.0609, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.12612642347812653, + "rewards/margins": 0.3389315605163574, + "rewards/rejected": -0.46505799889564514, + "step": 5230 + }, + { + "epoch": 1.0, + "learning_rate": 5.525919230670029e-11, + "logits/chosen": -1.5597736835479736, + "logits/rejected": -1.0183484554290771, + "logps/chosen": -586.085205078125, + "logps/rejected": -1289.2353515625, + "loss": 0.0449, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.11757595837116241, + "rewards/margins": 0.3439788818359375, + "rewards/rejected": -0.4615548551082611, + "step": 5240 + }, + { + "epoch": 1.0, + "learning_rate": 0.0, + "logits/chosen": -1.2492470741271973, + "logits/rejected": -0.795202374458313, + "logps/chosen": -545.5097045898438, + "logps/rejected": -1238.634033203125, + "loss": 0.0598, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.12360592931509018, + "rewards/margins": 0.28936389088630676, + "rewards/rejected": -0.41296982765197754, + "step": 5250 + }, + { + "epoch": 1.0, + "step": 5250, + "total_flos": 0.0, + "train_loss": 0.07767095326809656, + "train_runtime": 21725.9611, + "train_samples_per_second": 0.967, + "train_steps_per_second": 0.242 + } + ], + "logging_steps": 10, + "max_steps": 5250, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 100, + "total_flos": 0.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +}