{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 5250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 9.523809523809524e-09, "logits/chosen": -1.4941036701202393, "logits/rejected": -1.392427682876587, "logps/chosen": -67.08292388916016, "logps/rejected": -289.04925537109375, "loss": 0.1606, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 9.523809523809525e-08, "logits/chosen": -1.6196446418762207, "logits/rejected": -1.2225258350372314, "logps/chosen": -404.9694519042969, "logps/rejected": -765.021240234375, "loss": 0.2208, "rewards/accuracies": 0.4166666567325592, "rewards/chosen": 0.00017578975530341268, "rewards/margins": -5.83395967623801e-06, "rewards/rejected": 0.00018162367632612586, "step": 10 }, { "epoch": 0.0, "learning_rate": 1.904761904761905e-07, "logits/chosen": -1.7201000452041626, "logits/rejected": -1.0938055515289307, "logps/chosen": -470.4878845214844, "logps/rejected": -853.2833251953125, "loss": 0.2049, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.0003256895288359374, "rewards/margins": 0.00018404466391075402, "rewards/rejected": 0.0001416448940290138, "step": 20 }, { "epoch": 0.01, "learning_rate": 2.8571428571428575e-07, "logits/chosen": -1.7836272716522217, "logits/rejected": -1.3143703937530518, "logps/chosen": -437.3982849121094, "logps/rejected": -757.3729248046875, "loss": 0.1837, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.0006006127223372459, "rewards/margins": 0.0018018081318587065, "rewards/rejected": -0.0012011956423521042, "step": 30 }, { "epoch": 0.01, "learning_rate": 3.80952380952381e-07, "logits/chosen": -1.3586902618408203, "logits/rejected": -1.1272189617156982, "logps/chosen": -391.42633056640625, "logps/rejected": -752.5025634765625, "loss": 0.2255, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.0005751135759055614, "rewards/margins": 0.00423981249332428, "rewards/rejected": -0.003664699150249362, "step": 40 }, { "epoch": 0.01, "learning_rate": 4.7619047619047623e-07, "logits/chosen": -1.6611652374267578, "logits/rejected": -1.1007435321807861, "logps/chosen": -380.03289794921875, "logps/rejected": -720.9581298828125, "loss": 0.1682, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.00211103493347764, "rewards/margins": 0.009362945333123207, "rewards/rejected": -0.007251910865306854, "step": 50 }, { "epoch": 0.01, "learning_rate": 5.714285714285715e-07, "logits/chosen": -1.514490008354187, "logits/rejected": -1.14967942237854, "logps/chosen": -574.3751220703125, "logps/rejected": -911.6978759765625, "loss": 0.196, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.0030188350938260555, "rewards/margins": 0.015978649258613586, "rewards/rejected": -0.012959812767803669, "step": 60 }, { "epoch": 0.01, "learning_rate": 6.666666666666667e-07, "logits/chosen": -1.6599044799804688, "logits/rejected": -0.966509222984314, "logps/chosen": -598.3924560546875, "logps/rejected": -950.7366333007812, "loss": 0.1913, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.005272147245705128, "rewards/margins": 0.03154373914003372, "rewards/rejected": -0.02627158723771572, "step": 70 }, { "epoch": 0.02, "learning_rate": 7.61904761904762e-07, "logits/chosen": -1.6465399265289307, "logits/rejected": -1.0205047130584717, "logps/chosen": -457.08416748046875, "logps/rejected": -827.5206909179688, "loss": 0.1416, "rewards/accuracies": 0.875, "rewards/chosen": 0.018026497215032578, "rewards/margins": 0.047980599105358124, "rewards/rejected": -0.029954101890325546, "step": 80 }, { "epoch": 0.02, "learning_rate": 8.571428571428572e-07, "logits/chosen": -1.8371727466583252, "logits/rejected": -1.0108280181884766, "logps/chosen": -530.680908203125, "logps/rejected": -870.0900268554688, "loss": 0.1554, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.024562101811170578, "rewards/margins": 0.06634469330310822, "rewards/rejected": -0.041782595217227936, "step": 90 }, { "epoch": 0.02, "learning_rate": 9.523809523809525e-07, "logits/chosen": -1.7072471380233765, "logits/rejected": -1.1548950672149658, "logps/chosen": -446.76953125, "logps/rejected": -766.6468505859375, "loss": 0.1848, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.0029524858109652996, "rewards/margins": 0.0440407320857048, "rewards/rejected": -0.04108824580907822, "step": 100 }, { "epoch": 0.02, "learning_rate": 1.0476190476190478e-06, "logits/chosen": -1.5558496713638306, "logits/rejected": -1.1346105337142944, "logps/chosen": -404.34613037109375, "logps/rejected": -873.2721557617188, "loss": 0.0977, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.014201948419213295, "rewards/margins": 0.09548144042491913, "rewards/rejected": -0.08127949386835098, "step": 110 }, { "epoch": 0.02, "learning_rate": 1.142857142857143e-06, "logits/chosen": -1.51814866065979, "logits/rejected": -0.92498379945755, "logps/chosen": -569.2479248046875, "logps/rejected": -906.2493896484375, "loss": 0.1117, "rewards/accuracies": 0.75, "rewards/chosen": -0.026814710348844528, "rewards/margins": 0.08289747685194016, "rewards/rejected": -0.10971218347549438, "step": 120 }, { "epoch": 0.02, "learning_rate": 1.2380952380952382e-06, "logits/chosen": -1.7106437683105469, "logits/rejected": -0.9695409536361694, "logps/chosen": -596.420166015625, "logps/rejected": -1031.6600341796875, "loss": 0.1178, "rewards/accuracies": 0.875, "rewards/chosen": -0.031284015625715256, "rewards/margins": 0.12073127180337906, "rewards/rejected": -0.15201528370380402, "step": 130 }, { "epoch": 0.03, "learning_rate": 1.3333333333333334e-06, "logits/chosen": -1.6556059122085571, "logits/rejected": -1.0907657146453857, "logps/chosen": -574.3132934570312, "logps/rejected": -996.0400390625, "loss": 0.1512, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.05514935776591301, "rewards/margins": 0.11033248901367188, "rewards/rejected": -0.16548185050487518, "step": 140 }, { "epoch": 0.03, "learning_rate": 1.4285714285714286e-06, "logits/chosen": -1.5143836736679077, "logits/rejected": -0.996626079082489, "logps/chosen": -603.2044677734375, "logps/rejected": -1043.1396484375, "loss": 0.1036, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.08137336373329163, "rewards/margins": 0.13189618289470673, "rewards/rejected": -0.21326954662799835, "step": 150 }, { "epoch": 0.03, "learning_rate": 1.523809523809524e-06, "logits/chosen": -1.6874631643295288, "logits/rejected": -1.0239840745925903, "logps/chosen": -494.5599670410156, "logps/rejected": -1013.8928833007812, "loss": 0.1076, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0622636079788208, "rewards/margins": 0.19322165846824646, "rewards/rejected": -0.25548526644706726, "step": 160 }, { "epoch": 0.03, "learning_rate": 1.6190476190476193e-06, "logits/chosen": -1.5800025463104248, "logits/rejected": -1.1513192653656006, "logps/chosen": -510.38214111328125, "logps/rejected": -1081.0006103515625, "loss": 0.1125, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.07993271201848984, "rewards/margins": 0.1820063441991806, "rewards/rejected": -0.26193904876708984, "step": 170 }, { "epoch": 0.03, "learning_rate": 1.7142857142857145e-06, "logits/chosen": -1.6302579641342163, "logits/rejected": -1.0718019008636475, "logps/chosen": -573.6951904296875, "logps/rejected": -1089.02978515625, "loss": 0.1109, "rewards/accuracies": 0.75, "rewards/chosen": -0.12081176042556763, "rewards/margins": 0.19152003526687622, "rewards/rejected": -0.31233179569244385, "step": 180 }, { "epoch": 0.04, "learning_rate": 1.8095238095238097e-06, "logits/chosen": -1.6149966716766357, "logits/rejected": -1.0598729848861694, "logps/chosen": -598.2444458007812, "logps/rejected": -1140.306884765625, "loss": 0.0935, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.13946162164211273, "rewards/margins": 0.2145201712846756, "rewards/rejected": -0.35398179292678833, "step": 190 }, { "epoch": 0.04, "learning_rate": 1.904761904761905e-06, "logits/chosen": -1.8164126873016357, "logits/rejected": -1.0813723802566528, "logps/chosen": -629.240478515625, "logps/rejected": -1242.144775390625, "loss": 0.0676, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17410385608673096, "rewards/margins": 0.25729167461395264, "rewards/rejected": -0.4313955307006836, "step": 200 }, { "epoch": 0.04, "learning_rate": 2.0000000000000003e-06, "logits/chosen": -1.7437931299209595, "logits/rejected": -1.0192813873291016, "logps/chosen": -559.017822265625, "logps/rejected": -1077.651611328125, "loss": 0.0833, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06384438276290894, "rewards/margins": 0.24092309176921844, "rewards/rejected": -0.3047674596309662, "step": 210 }, { "epoch": 0.04, "learning_rate": 2.0952380952380955e-06, "logits/chosen": -1.4945495128631592, "logits/rejected": -0.9317284822463989, "logps/chosen": -613.0845947265625, "logps/rejected": -1165.627197265625, "loss": 0.0806, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.0914846882224083, "rewards/margins": 0.25627589225769043, "rewards/rejected": -0.3477606177330017, "step": 220 }, { "epoch": 0.04, "learning_rate": 2.1904761904761908e-06, "logits/chosen": -1.7308365106582642, "logits/rejected": -1.0819891691207886, "logps/chosen": -565.0457763671875, "logps/rejected": -1136.459716796875, "loss": 0.11, "rewards/accuracies": 0.75, "rewards/chosen": -0.14601822197437286, "rewards/margins": 0.2253512144088745, "rewards/rejected": -0.3713694214820862, "step": 230 }, { "epoch": 0.05, "learning_rate": 2.285714285714286e-06, "logits/chosen": -1.565441370010376, "logits/rejected": -0.9686568975448608, "logps/chosen": -580.4759521484375, "logps/rejected": -1177.9888916015625, "loss": 0.1125, "rewards/accuracies": 0.75, "rewards/chosen": -0.1845378577709198, "rewards/margins": 0.1970101296901703, "rewards/rejected": -0.3815479874610901, "step": 240 }, { "epoch": 0.05, "learning_rate": 2.380952380952381e-06, "logits/chosen": -1.6525847911834717, "logits/rejected": -1.0854707956314087, "logps/chosen": -660.649169921875, "logps/rejected": -1140.14306640625, "loss": 0.1494, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.15628325939178467, "rewards/margins": 0.19183820486068726, "rewards/rejected": -0.34812143445014954, "step": 250 }, { "epoch": 0.05, "learning_rate": 2.4761904761904764e-06, "logits/chosen": -1.6423368453979492, "logits/rejected": -1.3389235734939575, "logps/chosen": -474.299560546875, "logps/rejected": -1021.30810546875, "loss": 0.1047, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.07896876335144043, "rewards/margins": 0.18711309134960175, "rewards/rejected": -0.26608186960220337, "step": 260 }, { "epoch": 0.05, "learning_rate": 2.571428571428571e-06, "logits/chosen": -1.7679646015167236, "logits/rejected": -1.022589087486267, "logps/chosen": -573.9923706054688, "logps/rejected": -1240.5599365234375, "loss": 0.0823, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.144535630941391, "rewards/margins": 0.25096073746681213, "rewards/rejected": -0.3954963684082031, "step": 270 }, { "epoch": 0.05, "learning_rate": 2.666666666666667e-06, "logits/chosen": -1.5805046558380127, "logits/rejected": -1.2209659814834595, "logps/chosen": -689.5147705078125, "logps/rejected": -1180.4815673828125, "loss": 0.1223, "rewards/accuracies": 0.75, "rewards/chosen": -0.21097099781036377, "rewards/margins": 0.2033153772354126, "rewards/rejected": -0.41428643465042114, "step": 280 }, { "epoch": 0.06, "learning_rate": 2.7619047619047625e-06, "logits/chosen": -1.4988248348236084, "logits/rejected": -1.3288917541503906, "logps/chosen": -654.6685791015625, "logps/rejected": -1351.736083984375, "loss": 0.0885, "rewards/accuracies": 0.875, "rewards/chosen": -0.1968156397342682, "rewards/margins": 0.26081275939941406, "rewards/rejected": -0.4576283395290375, "step": 290 }, { "epoch": 0.06, "learning_rate": 2.8571428571428573e-06, "logits/chosen": -1.8760350942611694, "logits/rejected": -1.1818573474884033, "logps/chosen": -712.5704345703125, "logps/rejected": -1270.919677734375, "loss": 0.0943, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.20824570953845978, "rewards/margins": 0.23740389943122864, "rewards/rejected": -0.4456496238708496, "step": 300 }, { "epoch": 0.06, "learning_rate": 2.9523809523809525e-06, "logits/chosen": -1.7160227298736572, "logits/rejected": -1.109726905822754, "logps/chosen": -548.7153930664062, "logps/rejected": -1191.7733154296875, "loss": 0.0857, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.15231922268867493, "rewards/margins": 0.2784896790981293, "rewards/rejected": -0.4308088719844818, "step": 310 }, { "epoch": 0.06, "learning_rate": 3.047619047619048e-06, "logits/chosen": -1.7121608257293701, "logits/rejected": -1.1865103244781494, "logps/chosen": -561.9929809570312, "logps/rejected": -1196.7623291015625, "loss": 0.0665, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.15775255858898163, "rewards/margins": 0.2596059739589691, "rewards/rejected": -0.41735848784446716, "step": 320 }, { "epoch": 0.06, "learning_rate": 3.142857142857143e-06, "logits/chosen": -1.7223520278930664, "logits/rejected": -1.1820757389068604, "logps/chosen": -541.658447265625, "logps/rejected": -1158.8157958984375, "loss": 0.0664, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.12493345886468887, "rewards/margins": 0.25119608640670776, "rewards/rejected": -0.3761295676231384, "step": 330 }, { "epoch": 0.06, "learning_rate": 3.2380952380952385e-06, "logits/chosen": -1.5452698469161987, "logits/rejected": -0.9967746734619141, "logps/chosen": -682.1392822265625, "logps/rejected": -1289.32421875, "loss": 0.0913, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1961658000946045, "rewards/margins": 0.248531773686409, "rewards/rejected": -0.44469761848449707, "step": 340 }, { "epoch": 0.07, "learning_rate": 3.3333333333333333e-06, "logits/chosen": -1.4716678857803345, "logits/rejected": -1.3033275604248047, "logps/chosen": -558.1559448242188, "logps/rejected": -1119.2779541015625, "loss": 0.1114, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.11863617599010468, "rewards/margins": 0.18471702933311462, "rewards/rejected": -0.3033532202243805, "step": 350 }, { "epoch": 0.07, "learning_rate": 3.428571428571429e-06, "logits/chosen": -1.5965025424957275, "logits/rejected": -1.1964014768600464, "logps/chosen": -656.2760009765625, "logps/rejected": -1180.3507080078125, "loss": 0.0883, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.16832932829856873, "rewards/margins": 0.21959540247917175, "rewards/rejected": -0.3879247307777405, "step": 360 }, { "epoch": 0.07, "learning_rate": 3.523809523809524e-06, "logits/chosen": -1.6595999002456665, "logits/rejected": -1.197110891342163, "logps/chosen": -491.074462890625, "logps/rejected": -1110.8392333984375, "loss": 0.0918, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1459522545337677, "rewards/margins": 0.24827821552753448, "rewards/rejected": -0.3942304849624634, "step": 370 }, { "epoch": 0.07, "learning_rate": 3.6190476190476194e-06, "logits/chosen": -1.662347435951233, "logits/rejected": -1.183836579322815, "logps/chosen": -535.8385620117188, "logps/rejected": -1255.84765625, "loss": 0.0572, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.09289667755365372, "rewards/margins": 0.2889617085456848, "rewards/rejected": -0.38185834884643555, "step": 380 }, { "epoch": 0.07, "learning_rate": 3.7142857142857146e-06, "logits/chosen": -1.734784722328186, "logits/rejected": -1.2055104970932007, "logps/chosen": -504.5662536621094, "logps/rejected": -1199.697021484375, "loss": 0.0805, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.11345423758029938, "rewards/margins": 0.2887258231639862, "rewards/rejected": -0.4021800458431244, "step": 390 }, { "epoch": 0.08, "learning_rate": 3.80952380952381e-06, "logits/chosen": -1.8762212991714478, "logits/rejected": -0.9481765031814575, "logps/chosen": -750.0078125, "logps/rejected": -1295.263671875, "loss": 0.0862, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2424238920211792, "rewards/margins": 0.22880926728248596, "rewards/rejected": -0.47123318910598755, "step": 400 }, { "epoch": 0.08, "learning_rate": 3.9047619047619055e-06, "logits/chosen": -1.7143110036849976, "logits/rejected": -1.2198104858398438, "logps/chosen": -701.7123413085938, "logps/rejected": -1305.44140625, "loss": 0.0971, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2008996307849884, "rewards/margins": 0.26313668489456177, "rewards/rejected": -0.46403631567955017, "step": 410 }, { "epoch": 0.08, "learning_rate": 4.000000000000001e-06, "logits/chosen": -1.71208918094635, "logits/rejected": -1.0226839780807495, "logps/chosen": -600.1834716796875, "logps/rejected": -1139.7694091796875, "loss": 0.0848, "rewards/accuracies": 0.75, "rewards/chosen": -0.12446846067905426, "rewards/margins": 0.2344471663236618, "rewards/rejected": -0.35891565680503845, "step": 420 }, { "epoch": 0.08, "learning_rate": 4.095238095238096e-06, "logits/chosen": -1.7601484060287476, "logits/rejected": -1.099973201751709, "logps/chosen": -627.597412109375, "logps/rejected": -1246.977783203125, "loss": 0.0753, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.11978325992822647, "rewards/margins": 0.25477010011672974, "rewards/rejected": -0.3745533227920532, "step": 430 }, { "epoch": 0.08, "learning_rate": 4.190476190476191e-06, "logits/chosen": -1.435619592666626, "logits/rejected": -0.9720669984817505, "logps/chosen": -753.2772827148438, "logps/rejected": -1327.572265625, "loss": 0.0828, "rewards/accuracies": 0.875, "rewards/chosen": -0.26884615421295166, "rewards/margins": 0.23727090656757355, "rewards/rejected": -0.5061171054840088, "step": 440 }, { "epoch": 0.09, "learning_rate": 4.2857142857142855e-06, "logits/chosen": -1.5603129863739014, "logits/rejected": -1.0288165807724, "logps/chosen": -777.147705078125, "logps/rejected": -1493.787841796875, "loss": 0.0811, "rewards/accuracies": 0.875, "rewards/chosen": -0.32021015882492065, "rewards/margins": 0.3157784640789032, "rewards/rejected": -0.6359886527061462, "step": 450 }, { "epoch": 0.09, "learning_rate": 4.3809523809523815e-06, "logits/chosen": -1.552169919013977, "logits/rejected": -1.063795566558838, "logps/chosen": -634.6558837890625, "logps/rejected": -1188.177490234375, "loss": 0.1036, "rewards/accuracies": 0.75, "rewards/chosen": -0.21501021087169647, "rewards/margins": 0.208788201212883, "rewards/rejected": -0.42379841208457947, "step": 460 }, { "epoch": 0.09, "learning_rate": 4.476190476190477e-06, "logits/chosen": -1.8907029628753662, "logits/rejected": -1.0847430229187012, "logps/chosen": -527.0374755859375, "logps/rejected": -1173.001953125, "loss": 0.0646, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.08367784321308136, "rewards/margins": 0.2584603428840637, "rewards/rejected": -0.3421381413936615, "step": 470 }, { "epoch": 0.09, "learning_rate": 4.571428571428572e-06, "logits/chosen": -1.7828342914581299, "logits/rejected": -1.354624629020691, "logps/chosen": -562.9432373046875, "logps/rejected": -1167.8656005859375, "loss": 0.0815, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1048797145485878, "rewards/margins": 0.2481795847415924, "rewards/rejected": -0.353059321641922, "step": 480 }, { "epoch": 0.09, "learning_rate": 4.666666666666667e-06, "logits/chosen": -2.050565004348755, "logits/rejected": -1.307130217552185, "logps/chosen": -652.1871337890625, "logps/rejected": -1257.9901123046875, "loss": 0.061, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.15454623103141785, "rewards/margins": 0.2749207019805908, "rewards/rejected": -0.4294669032096863, "step": 490 }, { "epoch": 0.1, "learning_rate": 4.761904761904762e-06, "logits/chosen": -1.7903814315795898, "logits/rejected": -1.102508783340454, "logps/chosen": -714.9098510742188, "logps/rejected": -1366.354248046875, "loss": 0.0709, "rewards/accuracies": 0.875, "rewards/chosen": -0.18693551421165466, "rewards/margins": 0.3030626177787781, "rewards/rejected": -0.4899981617927551, "step": 500 }, { "epoch": 0.1, "learning_rate": 4.857142857142858e-06, "logits/chosen": -1.5884782075881958, "logits/rejected": -1.0900219678878784, "logps/chosen": -889.3687744140625, "logps/rejected": -1551.3133544921875, "loss": 0.1114, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.31739160418510437, "rewards/margins": 0.2541782259941101, "rewards/rejected": -0.5715699195861816, "step": 510 }, { "epoch": 0.1, "learning_rate": 4.952380952380953e-06, "logits/chosen": -1.917160987854004, "logits/rejected": -1.2584677934646606, "logps/chosen": -599.8109130859375, "logps/rejected": -1112.045166015625, "loss": 0.1099, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1927962303161621, "rewards/margins": 0.21344804763793945, "rewards/rejected": -0.40624427795410156, "step": 520 }, { "epoch": 0.1, "learning_rate": 4.999986185163754e-06, "logits/chosen": -1.6736475229263306, "logits/rejected": -1.1986761093139648, "logps/chosen": -646.7384033203125, "logps/rejected": -1181.8050537109375, "loss": 0.0981, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1889500617980957, "rewards/margins": 0.21725177764892578, "rewards/rejected": -0.4062018394470215, "step": 530 }, { "epoch": 0.1, "learning_rate": 4.999875667389858e-06, "logits/chosen": -1.7721306085586548, "logits/rejected": -1.1803306341171265, "logps/chosen": -586.8339233398438, "logps/rejected": -1139.8045654296875, "loss": 0.1072, "rewards/accuracies": 0.75, "rewards/chosen": -0.14554569125175476, "rewards/margins": 0.23414616286754608, "rewards/rejected": -0.3796918988227844, "step": 540 }, { "epoch": 0.1, "learning_rate": 4.999654636727765e-06, "logits/chosen": -1.6037023067474365, "logits/rejected": -1.2241770029067993, "logps/chosen": -585.2470092773438, "logps/rejected": -1193.3209228515625, "loss": 0.1007, "rewards/accuracies": 0.75, "rewards/chosen": -0.13885599374771118, "rewards/margins": 0.228451207280159, "rewards/rejected": -0.36730721592903137, "step": 550 }, { "epoch": 0.11, "learning_rate": 4.999323102948655e-06, "logits/chosen": -1.4847551584243774, "logits/rejected": -1.0340732336044312, "logps/chosen": -661.06103515625, "logps/rejected": -1281.92431640625, "loss": 0.1014, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.20873567461967468, "rewards/margins": 0.2781946659088135, "rewards/rejected": -0.48693031072616577, "step": 560 }, { "epoch": 0.11, "learning_rate": 4.998881080708759e-06, "logits/chosen": -1.5017633438110352, "logits/rejected": -1.003154993057251, "logps/chosen": -746.7311401367188, "logps/rejected": -1284.123779296875, "loss": 0.1062, "rewards/accuracies": 0.875, "rewards/chosen": -0.2704651951789856, "rewards/margins": 0.23320093750953674, "rewards/rejected": -0.5036661028862, "step": 570 }, { "epoch": 0.11, "learning_rate": 4.998328589548711e-06, "logits/chosen": -1.5699012279510498, "logits/rejected": -1.1782718896865845, "logps/chosen": -710.3646850585938, "logps/rejected": -1333.047119140625, "loss": 0.0741, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.24391067028045654, "rewards/margins": 0.2689984440803528, "rewards/rejected": -0.5129091143608093, "step": 580 }, { "epoch": 0.11, "learning_rate": 4.997665653892682e-06, "logits/chosen": -1.7179704904556274, "logits/rejected": -0.9554191827774048, "logps/chosen": -799.9317626953125, "logps/rejected": -1362.622802734375, "loss": 0.0745, "rewards/accuracies": 0.875, "rewards/chosen": -0.3002550005912781, "rewards/margins": 0.29512229561805725, "rewards/rejected": -0.5953773260116577, "step": 590 }, { "epoch": 0.11, "learning_rate": 4.996892303047306e-06, "logits/chosen": -1.586641550064087, "logits/rejected": -0.9480462074279785, "logps/chosen": -888.384765625, "logps/rejected": -1318.6873779296875, "loss": 0.112, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3719322383403778, "rewards/margins": 0.19167180359363556, "rewards/rejected": -0.5636041164398193, "step": 600 }, { "epoch": 0.12, "learning_rate": 4.996008571200375e-06, "logits/chosen": -1.5255849361419678, "logits/rejected": -1.0800695419311523, "logps/chosen": -759.3140869140625, "logps/rejected": -1329.3663330078125, "loss": 0.117, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.269595205783844, "rewards/margins": 0.23617248237133026, "rewards/rejected": -0.5057677030563354, "step": 610 }, { "epoch": 0.12, "learning_rate": 4.995014497419336e-06, "logits/chosen": -1.473812222480774, "logits/rejected": -1.122639775276184, "logps/chosen": -585.0800170898438, "logps/rejected": -1056.879638671875, "loss": 0.1179, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.17191004753112793, "rewards/margins": 0.15805818140506744, "rewards/rejected": -0.3299682140350342, "step": 620 }, { "epoch": 0.12, "learning_rate": 4.993910125649561e-06, "logits/chosen": -1.4959099292755127, "logits/rejected": -0.9445433616638184, "logps/chosen": -615.6098022460938, "logps/rejected": -1139.494873046875, "loss": 0.0918, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1279982626438141, "rewards/margins": 0.24162478744983673, "rewards/rejected": -0.3696230351924896, "step": 630 }, { "epoch": 0.12, "learning_rate": 4.992695504712402e-06, "logits/chosen": -1.4863700866699219, "logits/rejected": -1.1100517511367798, "logps/chosen": -520.9189453125, "logps/rejected": -1139.673095703125, "loss": 0.1111, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.13522864878177643, "rewards/margins": 0.2550002932548523, "rewards/rejected": -0.39022889733314514, "step": 640 }, { "epoch": 0.12, "learning_rate": 4.9913706883030385e-06, "logits/chosen": -1.688804268836975, "logits/rejected": -1.2719508409500122, "logps/chosen": -706.3018798828125, "logps/rejected": -1093.7733154296875, "loss": 0.1174, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.23440298438072205, "rewards/margins": 0.16613098978996277, "rewards/rejected": -0.4005340039730072, "step": 650 }, { "epoch": 0.13, "learning_rate": 4.989935734988098e-06, "logits/chosen": -1.6558220386505127, "logits/rejected": -1.0028841495513916, "logps/chosen": -598.4909057617188, "logps/rejected": -1052.578857421875, "loss": 0.1242, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.18583187460899353, "rewards/margins": 0.2180318385362625, "rewards/rejected": -0.40386366844177246, "step": 660 }, { "epoch": 0.13, "learning_rate": 4.988390708203068e-06, "logits/chosen": -1.7862279415130615, "logits/rejected": -1.118517518043518, "logps/chosen": -668.9701538085938, "logps/rejected": -1353.1531982421875, "loss": 0.0559, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1963638961315155, "rewards/margins": 0.30444028973579407, "rewards/rejected": -0.5008042454719543, "step": 670 }, { "epoch": 0.13, "learning_rate": 4.9867356762494955e-06, "logits/chosen": -1.5889132022857666, "logits/rejected": -1.1245874166488647, "logps/chosen": -745.0137939453125, "logps/rejected": -1370.260009765625, "loss": 0.0932, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.19919827580451965, "rewards/margins": 0.26395368576049805, "rewards/rejected": -0.4631519317626953, "step": 680 }, { "epoch": 0.13, "learning_rate": 4.984970712291963e-06, "logits/chosen": -1.7236887216567993, "logits/rejected": -0.9385706782341003, "logps/chosen": -698.5767822265625, "logps/rejected": -1253.93994140625, "loss": 0.0922, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1614110916852951, "rewards/margins": 0.2871715724468231, "rewards/rejected": -0.44858264923095703, "step": 690 }, { "epoch": 0.13, "learning_rate": 4.983095894354858e-06, "logits/chosen": -1.7446352243423462, "logits/rejected": -1.2324244976043701, "logps/chosen": -680.3492431640625, "logps/rejected": -1322.1500244140625, "loss": 0.091, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.19917258620262146, "rewards/margins": 0.26733073592185974, "rewards/rejected": -0.46650344133377075, "step": 700 }, { "epoch": 0.14, "learning_rate": 4.981111305318918e-06, "logits/chosen": -1.4915651082992554, "logits/rejected": -0.997687816619873, "logps/chosen": -652.5929565429688, "logps/rejected": -1232.224365234375, "loss": 0.1035, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.21357397735118866, "rewards/margins": 0.23794837296009064, "rewards/rejected": -0.4515223503112793, "step": 710 }, { "epoch": 0.14, "learning_rate": 4.979017032917576e-06, "logits/chosen": -1.93303644657135, "logits/rejected": -1.1569701433181763, "logps/chosen": -580.7186279296875, "logps/rejected": -1183.568115234375, "loss": 0.0613, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1580665409564972, "rewards/margins": 0.2544881999492645, "rewards/rejected": -0.41255468130111694, "step": 720 }, { "epoch": 0.14, "learning_rate": 4.97681316973307e-06, "logits/chosen": -1.5416204929351807, "logits/rejected": -1.1237828731536865, "logps/chosen": -565.6384887695312, "logps/rejected": -1154.13134765625, "loss": 0.112, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.14230427145957947, "rewards/margins": 0.2524174153804779, "rewards/rejected": -0.394721657037735, "step": 730 }, { "epoch": 0.14, "learning_rate": 4.9744998131923625e-06, "logits/chosen": -1.6045395135879517, "logits/rejected": -1.0634424686431885, "logps/chosen": -734.4767456054688, "logps/rejected": -1465.661376953125, "loss": 0.0922, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.26009348034858704, "rewards/margins": 0.2871132493019104, "rewards/rejected": -0.5472067594528198, "step": 740 }, { "epoch": 0.14, "learning_rate": 4.9720770655628216e-06, "logits/chosen": -1.8270715475082397, "logits/rejected": -1.260059118270874, "logps/chosen": -715.5145263671875, "logps/rejected": -1226.204833984375, "loss": 0.1144, "rewards/accuracies": 0.75, "rewards/chosen": -0.18080273270606995, "rewards/margins": 0.2042236328125, "rewards/rejected": -0.38502639532089233, "step": 750 }, { "epoch": 0.14, "learning_rate": 4.969545033947711e-06, "logits/chosen": -1.673006296157837, "logits/rejected": -1.180161952972412, "logps/chosen": -606.9016723632812, "logps/rejected": -1316.323486328125, "loss": 0.0723, "rewards/accuracies": 0.875, "rewards/chosen": -0.1526075303554535, "rewards/margins": 0.26150739192962646, "rewards/rejected": -0.41411495208740234, "step": 760 }, { "epoch": 0.15, "learning_rate": 4.966903830281449e-06, "logits/chosen": -1.9988367557525635, "logits/rejected": -1.309494972229004, "logps/chosen": -557.9260864257812, "logps/rejected": -1146.306640625, "loss": 0.0867, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.09266151487827301, "rewards/margins": 0.2640661597251892, "rewards/rejected": -0.3567276895046234, "step": 770 }, { "epoch": 0.15, "learning_rate": 4.964153571324658e-06, "logits/chosen": -1.978520154953003, "logits/rejected": -1.1246730089187622, "logps/chosen": -589.2442626953125, "logps/rejected": -954.0111083984375, "loss": 0.1111, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1084524616599083, "rewards/margins": 0.19350787997245789, "rewards/rejected": -0.30196037888526917, "step": 780 }, { "epoch": 0.15, "learning_rate": 4.96129437865901e-06, "logits/chosen": -1.7288103103637695, "logits/rejected": -1.1625417470932007, "logps/chosen": -582.9478759765625, "logps/rejected": -1196.01318359375, "loss": 0.0591, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.15656381845474243, "rewards/margins": 0.2532889246940613, "rewards/rejected": -0.4098528027534485, "step": 790 }, { "epoch": 0.15, "learning_rate": 4.958326378681849e-06, "logits/chosen": -1.7046226263046265, "logits/rejected": -1.2026054859161377, "logps/chosen": -526.1422119140625, "logps/rejected": -1188.0306396484375, "loss": 0.0666, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.12230102717876434, "rewards/margins": 0.279751718044281, "rewards/rejected": -0.40205278992652893, "step": 800 }, { "epoch": 0.15, "learning_rate": 4.955249702600598e-06, "logits/chosen": -1.6546719074249268, "logits/rejected": -1.117095708847046, "logps/chosen": -624.09375, "logps/rejected": -1231.6494140625, "loss": 0.0991, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.15768446028232574, "rewards/margins": 0.2528056502342224, "rewards/rejected": -0.41049009561538696, "step": 810 }, { "epoch": 0.16, "learning_rate": 4.952064486426965e-06, "logits/chosen": -1.7625147104263306, "logits/rejected": -1.1312098503112793, "logps/chosen": -638.472412109375, "logps/rejected": -1207.470458984375, "loss": 0.0717, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1054113358259201, "rewards/margins": 0.2620728611946106, "rewards/rejected": -0.3674841821193695, "step": 820 }, { "epoch": 0.16, "learning_rate": 4.948770870970929e-06, "logits/chosen": -1.8074289560317993, "logits/rejected": -1.2430999279022217, "logps/chosen": -625.0852661132812, "logps/rejected": -1263.1839599609375, "loss": 0.0759, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.14085423946380615, "rewards/margins": 0.27442166209220886, "rewards/rejected": -0.415275901556015, "step": 830 }, { "epoch": 0.16, "learning_rate": 4.9453690018345144e-06, "logits/chosen": -1.782332181930542, "logits/rejected": -1.2486510276794434, "logps/chosen": -668.4853515625, "logps/rejected": -1139.117919921875, "loss": 0.1234, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1727108657360077, "rewards/margins": 0.21505430340766907, "rewards/rejected": -0.38776513934135437, "step": 840 }, { "epoch": 0.16, "learning_rate": 4.941859029405354e-06, "logits/chosen": -1.5351417064666748, "logits/rejected": -1.2106958627700806, "logps/chosen": -692.7496337890625, "logps/rejected": -1367.590087890625, "loss": 0.0951, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2019486427307129, "rewards/margins": 0.27323096990585327, "rewards/rejected": -0.47517961263656616, "step": 850 }, { "epoch": 0.16, "learning_rate": 4.938241108850039e-06, "logits/chosen": -1.7847673892974854, "logits/rejected": -0.9648265838623047, "logps/chosen": -705.1673583984375, "logps/rejected": -1294.5316162109375, "loss": 0.0712, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17169032990932465, "rewards/margins": 0.2827732563018799, "rewards/rejected": -0.45446357131004333, "step": 860 }, { "epoch": 0.17, "learning_rate": 4.934515400107266e-06, "logits/chosen": -1.717449426651001, "logits/rejected": -1.2112057209014893, "logps/chosen": -663.0195922851562, "logps/rejected": -1365.6519775390625, "loss": 0.0685, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.19511958956718445, "rewards/margins": 0.2715843617916107, "rewards/rejected": -0.4667038917541504, "step": 870 }, { "epoch": 0.17, "learning_rate": 4.930682067880759e-06, "logits/chosen": -1.6452863216400146, "logits/rejected": -1.1141650676727295, "logps/chosen": -567.5109252929688, "logps/rejected": -1393.507568359375, "loss": 0.0479, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.17566943168640137, "rewards/margins": 0.3386470377445221, "rewards/rejected": -0.5143164396286011, "step": 880 }, { "epoch": 0.17, "learning_rate": 4.926741281631991e-06, "logits/chosen": -1.6676323413848877, "logits/rejected": -1.2808525562286377, "logps/chosen": -630.0742797851562, "logps/rejected": -1261.5531005859375, "loss": 0.0675, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.18672814965248108, "rewards/margins": 0.2657690644264221, "rewards/rejected": -0.4524971842765808, "step": 890 }, { "epoch": 0.17, "learning_rate": 4.922693215572695e-06, "logits/chosen": -1.4996672868728638, "logits/rejected": -1.0641984939575195, "logps/chosen": -514.0299072265625, "logps/rejected": -1189.6839599609375, "loss": 0.0829, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.11306717246770859, "rewards/margins": 0.29115527868270874, "rewards/rejected": -0.40422242879867554, "step": 900 }, { "epoch": 0.17, "learning_rate": 4.91853804865716e-06, "logits/chosen": -1.6429615020751953, "logits/rejected": -1.2641582489013672, "logps/chosen": -637.0469360351562, "logps/rejected": -1262.9354248046875, "loss": 0.0909, "rewards/accuracies": 0.75, "rewards/chosen": -0.15128901600837708, "rewards/margins": 0.2525308430194855, "rewards/rejected": -0.40381985902786255, "step": 910 }, { "epoch": 0.18, "learning_rate": 4.91427596457432e-06, "logits/chosen": -1.745375633239746, "logits/rejected": -1.1431455612182617, "logps/chosen": -661.6976318359375, "logps/rejected": -1298.279541015625, "loss": 0.0742, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1605122685432434, "rewards/margins": 0.2791559100151062, "rewards/rejected": -0.4396681785583496, "step": 920 }, { "epoch": 0.18, "learning_rate": 4.909907151739634e-06, "logits/chosen": -1.863525152206421, "logits/rejected": -1.5125272274017334, "logps/chosen": -525.1600341796875, "logps/rejected": -1222.451171875, "loss": 0.0681, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.12098580598831177, "rewards/margins": 0.295646607875824, "rewards/rejected": -0.4166324734687805, "step": 930 }, { "epoch": 0.18, "learning_rate": 4.905431803286756e-06, "logits/chosen": -1.5892441272735596, "logits/rejected": -1.2031795978546143, "logps/chosen": -558.3675537109375, "logps/rejected": -1227.478515625, "loss": 0.0882, "rewards/accuracies": 0.875, "rewards/chosen": -0.13429304957389832, "rewards/margins": 0.2827587127685547, "rewards/rejected": -0.4170517325401306, "step": 940 }, { "epoch": 0.18, "learning_rate": 4.900850117059e-06, "logits/chosen": -1.621360182762146, "logits/rejected": -1.333057165145874, "logps/chosen": -596.5543823242188, "logps/rejected": -1246.934326171875, "loss": 0.0652, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.16683629155158997, "rewards/margins": 0.29126232862472534, "rewards/rejected": -0.4580985903739929, "step": 950 }, { "epoch": 0.18, "learning_rate": 4.8961622956005895e-06, "logits/chosen": -1.7013307809829712, "logits/rejected": -1.2199418544769287, "logps/chosen": -655.8325805664062, "logps/rejected": -1257.751708984375, "loss": 0.0768, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.17777304351329803, "rewards/margins": 0.26354774832725525, "rewards/rejected": -0.44132083654403687, "step": 960 }, { "epoch": 0.18, "learning_rate": 4.891368546147707e-06, "logits/chosen": -1.7467527389526367, "logits/rejected": -1.349560022354126, "logps/chosen": -574.1756591796875, "logps/rejected": -1149.5211181640625, "loss": 0.1057, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.14399586617946625, "rewards/margins": 0.26451900601387024, "rewards/rejected": -0.4085148870944977, "step": 970 }, { "epoch": 0.19, "learning_rate": 4.88646908061933e-06, "logits/chosen": -1.8687814474105835, "logits/rejected": -1.1187694072723389, "logps/chosen": -652.5084228515625, "logps/rejected": -1175.923095703125, "loss": 0.0937, "rewards/accuracies": 0.75, "rewards/chosen": -0.13137920200824738, "rewards/margins": 0.2375623881816864, "rewards/rejected": -0.36894160509109497, "step": 980 }, { "epoch": 0.19, "learning_rate": 4.881464115607866e-06, "logits/chosen": -1.7792987823486328, "logits/rejected": -1.0951414108276367, "logps/chosen": -598.0963134765625, "logps/rejected": -1200.380615234375, "loss": 0.0843, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.08524195849895477, "rewards/margins": 0.26355254650115967, "rewards/rejected": -0.34879451990127563, "step": 990 }, { "epoch": 0.19, "learning_rate": 4.876353872369573e-06, "logits/chosen": -1.8199819326400757, "logits/rejected": -1.1733105182647705, "logps/chosen": -558.3444213867188, "logps/rejected": -1186.50439453125, "loss": 0.0714, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.05597637966275215, "rewards/margins": 0.25787559151649475, "rewards/rejected": -0.3138519823551178, "step": 1000 }, { "epoch": 0.19, "learning_rate": 4.871138576814782e-06, "logits/chosen": -1.679121971130371, "logits/rejected": -1.1686570644378662, "logps/chosen": -554.617919921875, "logps/rejected": -1218.363037109375, "loss": 0.0873, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.09123705327510834, "rewards/margins": 0.2742183804512024, "rewards/rejected": -0.3654554486274719, "step": 1010 }, { "epoch": 0.19, "learning_rate": 4.865818459497911e-06, "logits/chosen": -1.646142601966858, "logits/rejected": -1.087127923965454, "logps/chosen": -595.8470458984375, "logps/rejected": -1093.2322998046875, "loss": 0.1123, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.16172102093696594, "rewards/margins": 0.21603676676750183, "rewards/rejected": -0.3777577877044678, "step": 1020 }, { "epoch": 0.2, "learning_rate": 4.860393755607266e-06, "logits/chosen": -1.522953748703003, "logits/rejected": -1.0482865571975708, "logps/chosen": -692.7568969726562, "logps/rejected": -1367.2935791015625, "loss": 0.1043, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.24981939792633057, "rewards/margins": 0.28347522020339966, "rewards/rejected": -0.5332946181297302, "step": 1030 }, { "epoch": 0.2, "learning_rate": 4.854864704954654e-06, "logits/chosen": -1.4114348888397217, "logits/rejected": -1.1013247966766357, "logps/chosen": -734.9617309570312, "logps/rejected": -1330.0992431640625, "loss": 0.077, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2645266354084015, "rewards/margins": 0.23836931586265564, "rewards/rejected": -0.5028959512710571, "step": 1040 }, { "epoch": 0.2, "learning_rate": 4.849231551964771e-06, "logits/chosen": -1.7310336828231812, "logits/rejected": -0.9586830139160156, "logps/chosen": -699.4583740234375, "logps/rejected": -1246.3502197265625, "loss": 0.0698, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.16729851067066193, "rewards/margins": 0.28657031059265137, "rewards/rejected": -0.4538688659667969, "step": 1050 }, { "epoch": 0.2, "learning_rate": 4.843494545664407e-06, "logits/chosen": -1.3404247760772705, "logits/rejected": -0.7918633818626404, "logps/chosen": -588.2305908203125, "logps/rejected": -1199.187744140625, "loss": 0.0801, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1429479569196701, "rewards/margins": 0.30188634991645813, "rewards/rejected": -0.44483429193496704, "step": 1060 }, { "epoch": 0.2, "learning_rate": 4.837653939671427e-06, "logits/chosen": -1.6853176355361938, "logits/rejected": -0.9261191487312317, "logps/chosen": -739.8435668945312, "logps/rejected": -1302.793701171875, "loss": 0.1033, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2311195582151413, "rewards/margins": 0.2652646601200104, "rewards/rejected": -0.49638423323631287, "step": 1070 }, { "epoch": 0.21, "learning_rate": 4.8317099921835695e-06, "logits/chosen": -1.4624329805374146, "logits/rejected": -1.0136330127716064, "logps/chosen": -571.1947631835938, "logps/rejected": -1336.320556640625, "loss": 0.067, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1917448490858078, "rewards/margins": 0.34521591663360596, "rewards/rejected": -0.536960780620575, "step": 1080 }, { "epoch": 0.21, "learning_rate": 4.825662965967023e-06, "logits/chosen": -1.6408259868621826, "logits/rejected": -0.9534978866577148, "logps/chosen": -721.38134765625, "logps/rejected": -1417.104736328125, "loss": 0.0623, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.19875575602054596, "rewards/margins": 0.3017219007015228, "rewards/rejected": -0.50047767162323, "step": 1090 }, { "epoch": 0.21, "learning_rate": 4.819513128344814e-06, "logits/chosen": -1.853732705116272, "logits/rejected": -0.9827998280525208, "logps/chosen": -554.1961669921875, "logps/rejected": -1199.3424072265625, "loss": 0.0617, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.12518128752708435, "rewards/margins": 0.2828952968120575, "rewards/rejected": -0.40807658433914185, "step": 1100 }, { "epoch": 0.21, "learning_rate": 4.813260751184992e-06, "logits/chosen": -1.7140512466430664, "logits/rejected": -1.5034291744232178, "logps/chosen": -469.31536865234375, "logps/rejected": -1079.330810546875, "loss": 0.1014, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.11440832912921906, "rewards/margins": 0.23391124606132507, "rewards/rejected": -0.34831956028938293, "step": 1110 }, { "epoch": 0.21, "learning_rate": 4.806906110888606e-06, "logits/chosen": -1.8567192554473877, "logits/rejected": -1.186836838722229, "logps/chosen": -660.9390258789062, "logps/rejected": -1327.2064208984375, "loss": 0.0649, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.17495737969875336, "rewards/margins": 0.3085317015647888, "rewards/rejected": -0.48348909616470337, "step": 1120 }, { "epoch": 0.22, "learning_rate": 4.8004494883774885e-06, "logits/chosen": -1.629417061805725, "logits/rejected": -1.1237983703613281, "logps/chosen": -658.7716674804688, "logps/rejected": -1356.76220703125, "loss": 0.075, "rewards/accuracies": 0.875, "rewards/chosen": -0.21607021987438202, "rewards/margins": 0.29712003469467163, "rewards/rejected": -0.5131902694702148, "step": 1130 }, { "epoch": 0.22, "learning_rate": 4.793891169081835e-06, "logits/chosen": -1.5540090799331665, "logits/rejected": -1.0890731811523438, "logps/chosen": -597.20654296875, "logps/rejected": -1241.4404296875, "loss": 0.0756, "rewards/accuracies": 0.875, "rewards/chosen": -0.1598585844039917, "rewards/margins": 0.2764621376991272, "rewards/rejected": -0.4363207221031189, "step": 1140 }, { "epoch": 0.22, "learning_rate": 4.787231442927587e-06, "logits/chosen": -1.7837997674942017, "logits/rejected": -1.0015078783035278, "logps/chosen": -753.8917846679688, "logps/rejected": -1246.1300048828125, "loss": 0.0881, "rewards/accuracies": 0.75, "rewards/chosen": -0.18574762344360352, "rewards/margins": 0.2450966089963913, "rewards/rejected": -0.430844247341156, "step": 1150 }, { "epoch": 0.22, "learning_rate": 4.780470604323616e-06, "logits/chosen": -1.8322811126708984, "logits/rejected": -1.0309690237045288, "logps/chosen": -657.1239013671875, "logps/rejected": -1257.453857421875, "loss": 0.0762, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.09944574534893036, "rewards/margins": 0.27715909481048584, "rewards/rejected": -0.3766048550605774, "step": 1160 }, { "epoch": 0.22, "learning_rate": 4.773608952148706e-06, "logits/chosen": -1.8025999069213867, "logits/rejected": -1.1409608125686646, "logps/chosen": -575.5861206054688, "logps/rejected": -1183.916259765625, "loss": 0.075, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.09301136434078217, "rewards/margins": 0.26918086409568787, "rewards/rejected": -0.36219221353530884, "step": 1170 }, { "epoch": 0.22, "learning_rate": 4.766646789738342e-06, "logits/chosen": -1.7869154214859009, "logits/rejected": -1.3070907592773438, "logps/chosen": -539.0377197265625, "logps/rejected": -1107.183837890625, "loss": 0.0632, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.0965101346373558, "rewards/margins": 0.27286410331726074, "rewards/rejected": -0.36937421560287476, "step": 1180 }, { "epoch": 0.23, "learning_rate": 4.759584424871302e-06, "logits/chosen": -1.773685097694397, "logits/rejected": -1.0828640460968018, "logps/chosen": -565.7281494140625, "logps/rejected": -1151.1832275390625, "loss": 0.068, "rewards/accuracies": 0.875, "rewards/chosen": -0.11723196506500244, "rewards/margins": 0.280317485332489, "rewards/rejected": -0.39754948019981384, "step": 1190 }, { "epoch": 0.23, "learning_rate": 4.752422169756048e-06, "logits/chosen": -1.7946689128875732, "logits/rejected": -1.1525065898895264, "logps/chosen": -652.74169921875, "logps/rejected": -1309.117919921875, "loss": 0.0831, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.17318835854530334, "rewards/margins": 0.3087579309940338, "rewards/rejected": -0.48194631934165955, "step": 1200 }, { "epoch": 0.23, "learning_rate": 4.745160341016927e-06, "logits/chosen": -1.865501046180725, "logits/rejected": -1.3471348285675049, "logps/chosen": -731.3184814453125, "logps/rejected": -1188.284423828125, "loss": 0.0972, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.20365187525749207, "rewards/margins": 0.21393127739429474, "rewards/rejected": -0.417583167552948, "step": 1210 }, { "epoch": 0.23, "learning_rate": 4.737799259680172e-06, "logits/chosen": -1.538688063621521, "logits/rejected": -1.0417388677597046, "logps/chosen": -677.1107177734375, "logps/rejected": -1417.5760498046875, "loss": 0.0571, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.2299385517835617, "rewards/margins": 0.3572830557823181, "rewards/rejected": -0.5872215628623962, "step": 1220 }, { "epoch": 0.23, "learning_rate": 4.730339251159709e-06, "logits/chosen": -1.6450903415679932, "logits/rejected": -1.0749512910842896, "logps/chosen": -807.4049072265625, "logps/rejected": -1535.035400390625, "loss": 0.0757, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.25353971123695374, "rewards/margins": 0.3195953071117401, "rewards/rejected": -0.5731350183486938, "step": 1230 }, { "epoch": 0.24, "learning_rate": 4.722780645242775e-06, "logits/chosen": -1.4446080923080444, "logits/rejected": -0.9962978363037109, "logps/chosen": -559.8865966796875, "logps/rejected": -1117.860107421875, "loss": 0.0944, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1235518679022789, "rewards/margins": 0.2574634253978729, "rewards/rejected": -0.3810153305530548, "step": 1240 }, { "epoch": 0.24, "learning_rate": 4.715123776075337e-06, "logits/chosen": -1.5536324977874756, "logits/rejected": -0.9707902669906616, "logps/chosen": -623.35302734375, "logps/rejected": -1270.5560302734375, "loss": 0.0802, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.16853728890419006, "rewards/margins": 0.2629413604736328, "rewards/rejected": -0.43147867918014526, "step": 1250 }, { "epoch": 0.24, "learning_rate": 4.707368982147318e-06, "logits/chosen": -1.609861969947815, "logits/rejected": -1.2645909786224365, "logps/chosen": -731.3851318359375, "logps/rejected": -1379.0576171875, "loss": 0.0645, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2083616703748703, "rewards/margins": 0.3054686188697815, "rewards/rejected": -0.513830304145813, "step": 1260 }, { "epoch": 0.24, "learning_rate": 4.699516606277638e-06, "logits/chosen": -1.5661576986312866, "logits/rejected": -1.0933661460876465, "logps/chosen": -635.5277099609375, "logps/rejected": -1325.19384765625, "loss": 0.0897, "rewards/accuracies": 0.875, "rewards/chosen": -0.16789881885051727, "rewards/margins": 0.31640955805778503, "rewards/rejected": -0.4843083322048187, "step": 1270 }, { "epoch": 0.24, "learning_rate": 4.691566995599056e-06, "logits/chosen": -1.6782423257827759, "logits/rejected": -1.106381893157959, "logps/chosen": -651.0316772460938, "logps/rejected": -1290.894775390625, "loss": 0.0708, "rewards/accuracies": 0.875, "rewards/chosen": -0.1785011887550354, "rewards/margins": 0.28199997544288635, "rewards/rejected": -0.46050113439559937, "step": 1280 }, { "epoch": 0.25, "learning_rate": 4.683520501542825e-06, "logits/chosen": -1.6150267124176025, "logits/rejected": -1.025458812713623, "logps/chosen": -732.90185546875, "logps/rejected": -1455.954833984375, "loss": 0.0649, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.24051375687122345, "rewards/margins": 0.326476514339447, "rewards/rejected": -0.5669902563095093, "step": 1290 }, { "epoch": 0.25, "learning_rate": 4.675377479823153e-06, "logits/chosen": -1.629797339439392, "logits/rejected": -1.152362585067749, "logps/chosen": -604.6630859375, "logps/rejected": -1252.499755859375, "loss": 0.0777, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.15616537630558014, "rewards/margins": 0.296053022146225, "rewards/rejected": -0.4522184431552887, "step": 1300 }, { "epoch": 0.25, "learning_rate": 4.667138290421483e-06, "logits/chosen": -1.421653151512146, "logits/rejected": -0.8815647959709167, "logps/chosen": -626.5021362304688, "logps/rejected": -1467.975830078125, "loss": 0.0501, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.18186897039413452, "rewards/margins": 0.39172568917274475, "rewards/rejected": -0.5735946893692017, "step": 1310 }, { "epoch": 0.25, "learning_rate": 4.658803297570578e-06, "logits/chosen": -1.4821852445602417, "logits/rejected": -1.1698157787322998, "logps/chosen": -730.8109741210938, "logps/rejected": -1356.880615234375, "loss": 0.0802, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2391328066587448, "rewards/margins": 0.26806876063346863, "rewards/rejected": -0.507201611995697, "step": 1320 }, { "epoch": 0.25, "learning_rate": 4.650372869738415e-06, "logits/chosen": -1.674965262413025, "logits/rejected": -1.0977269411087036, "logps/chosen": -595.2286987304688, "logps/rejected": -1290.0439453125, "loss": 0.059, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.17417269945144653, "rewards/margins": 0.31828632950782776, "rewards/rejected": -0.4924590587615967, "step": 1330 }, { "epoch": 0.26, "learning_rate": 4.641847379611898e-06, "logits/chosen": -1.5804319381713867, "logits/rejected": -0.8383885622024536, "logps/chosen": -729.4918823242188, "logps/rejected": -1354.4481201171875, "loss": 0.0658, "rewards/accuracies": 0.875, "rewards/chosen": -0.24894657731056213, "rewards/margins": 0.2908107042312622, "rewards/rejected": -0.5397573709487915, "step": 1340 }, { "epoch": 0.26, "learning_rate": 4.633227204080389e-06, "logits/chosen": -1.4710257053375244, "logits/rejected": -0.9187448620796204, "logps/chosen": -717.806396484375, "logps/rejected": -1383.8284912109375, "loss": 0.0692, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.22807364165782928, "rewards/margins": 0.3141343891620636, "rewards/rejected": -0.5422079563140869, "step": 1350 }, { "epoch": 0.26, "learning_rate": 4.624512724219038e-06, "logits/chosen": -1.8117561340332031, "logits/rejected": -1.174837589263916, "logps/chosen": -577.433349609375, "logps/rejected": -1249.8837890625, "loss": 0.0789, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.16873693466186523, "rewards/margins": 0.3111843168735504, "rewards/rejected": -0.47992125153541565, "step": 1360 }, { "epoch": 0.26, "learning_rate": 4.6157043252719374e-06, "logits/chosen": -1.5913151502609253, "logits/rejected": -1.1274433135986328, "logps/chosen": -622.65380859375, "logps/rejected": -1108.6986083984375, "loss": 0.0986, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.20124728977680206, "rewards/margins": 0.2101028710603714, "rewards/rejected": -0.41135016083717346, "step": 1370 }, { "epoch": 0.26, "learning_rate": 4.606802396635098e-06, "logits/chosen": -1.5497840642929077, "logits/rejected": -1.1788917779922485, "logps/chosen": -500.24700927734375, "logps/rejected": -1297.15966796875, "loss": 0.0654, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1291418969631195, "rewards/margins": 0.3215458393096924, "rewards/rejected": -0.4506877362728119, "step": 1380 }, { "epoch": 0.26, "learning_rate": 4.597807331839229e-06, "logits/chosen": -1.5772576332092285, "logits/rejected": -1.1091996431350708, "logps/chosen": -627.5196533203125, "logps/rejected": -1284.9049072265625, "loss": 0.071, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1847463697195053, "rewards/margins": 0.2685849070549011, "rewards/rejected": -0.4533312916755676, "step": 1390 }, { "epoch": 0.27, "learning_rate": 4.588719528532342e-06, "logits/chosen": -1.6495682001113892, "logits/rejected": -1.0364662408828735, "logps/chosen": -684.3934326171875, "logps/rejected": -1204.6859130859375, "loss": 0.0848, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.20700335502624512, "rewards/margins": 0.23912009596824646, "rewards/rejected": -0.4461234509944916, "step": 1400 }, { "epoch": 0.27, "learning_rate": 4.5795393884621735e-06, "logits/chosen": -1.829358696937561, "logits/rejected": -1.0703871250152588, "logps/chosen": -587.6880493164062, "logps/rejected": -1229.123291015625, "loss": 0.0504, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.13737113773822784, "rewards/margins": 0.3153493106365204, "rewards/rejected": -0.4527204632759094, "step": 1410 }, { "epoch": 0.27, "learning_rate": 4.5702673174584236e-06, "logits/chosen": -1.5375653505325317, "logits/rejected": -1.086010456085205, "logps/chosen": -600.8570556640625, "logps/rejected": -1225.86962890625, "loss": 0.1055, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1756105124950409, "rewards/margins": 0.2715519964694977, "rewards/rejected": -0.44716253876686096, "step": 1420 }, { "epoch": 0.27, "learning_rate": 4.560903725414816e-06, "logits/chosen": -1.8195488452911377, "logits/rejected": -1.128953456878662, "logps/chosen": -594.7276611328125, "logps/rejected": -1118.1014404296875, "loss": 0.0733, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1236497312784195, "rewards/margins": 0.26376843452453613, "rewards/rejected": -0.3874182105064392, "step": 1430 }, { "epoch": 0.27, "learning_rate": 4.551449026270979e-06, "logits/chosen": -1.4914960861206055, "logits/rejected": -0.9787286520004272, "logps/chosen": -583.1992797851562, "logps/rejected": -1058.70068359375, "loss": 0.155, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.13674364984035492, "rewards/margins": 0.1916511058807373, "rewards/rejected": -0.3283947706222534, "step": 1440 }, { "epoch": 0.28, "learning_rate": 4.541903637994142e-06, "logits/chosen": -1.7652862071990967, "logits/rejected": -1.3571741580963135, "logps/chosen": -633.3842163085938, "logps/rejected": -1270.315185546875, "loss": 0.0713, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.22607281804084778, "rewards/margins": 0.3079972565174103, "rewards/rejected": -0.5340700745582581, "step": 1450 }, { "epoch": 0.28, "learning_rate": 4.532267982560662e-06, "logits/chosen": -1.750309705734253, "logits/rejected": -1.1464341878890991, "logps/chosen": -733.6136474609375, "logps/rejected": -1293.258544921875, "loss": 0.0863, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2598066031932831, "rewards/margins": 0.2805883288383484, "rewards/rejected": -0.5403949618339539, "step": 1460 }, { "epoch": 0.28, "learning_rate": 4.522542485937369e-06, "logits/chosen": -1.6304270029067993, "logits/rejected": -1.1222264766693115, "logps/chosen": -736.6213989257812, "logps/rejected": -1387.48876953125, "loss": 0.0862, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2621721923351288, "rewards/margins": 0.28952568769454956, "rewards/rejected": -0.5516979098320007, "step": 1470 }, { "epoch": 0.28, "learning_rate": 4.512727578062733e-06, "logits/chosen": -1.546983003616333, "logits/rejected": -1.1147258281707764, "logps/chosen": -720.029296875, "logps/rejected": -1342.5504150390625, "loss": 0.0939, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.22671222686767578, "rewards/margins": 0.26174527406692505, "rewards/rejected": -0.48845750093460083, "step": 1480 }, { "epoch": 0.28, "learning_rate": 4.502823692827859e-06, "logits/chosen": -1.6339771747589111, "logits/rejected": -0.8447777628898621, "logps/chosen": -591.8370361328125, "logps/rejected": -1214.6041259765625, "loss": 0.0648, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.13122068345546722, "rewards/margins": 0.29385435581207275, "rewards/rejected": -0.4250749945640564, "step": 1490 }, { "epoch": 0.29, "learning_rate": 4.492831268057307e-06, "logits/chosen": -1.5530567169189453, "logits/rejected": -1.39444899559021, "logps/chosen": -579.3165283203125, "logps/rejected": -1225.831298828125, "loss": 0.0804, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17402389645576477, "rewards/margins": 0.24063608050346375, "rewards/rejected": -0.4146599769592285, "step": 1500 }, { "epoch": 0.29, "learning_rate": 4.482750745489733e-06, "logits/chosen": -1.5297390222549438, "logits/rejected": -1.2879207134246826, "logps/chosen": -587.7469482421875, "logps/rejected": -1158.90283203125, "loss": 0.0884, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1852291375398636, "rewards/margins": 0.24651098251342773, "rewards/rejected": -0.4317401051521301, "step": 1510 }, { "epoch": 0.29, "learning_rate": 4.472582570758367e-06, "logits/chosen": -1.6017887592315674, "logits/rejected": -1.2658944129943848, "logps/chosen": -623.4454956054688, "logps/rejected": -1206.4208984375, "loss": 0.1038, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1995050460100174, "rewards/margins": 0.25444871187210083, "rewards/rejected": -0.4539538025856018, "step": 1520 }, { "epoch": 0.29, "learning_rate": 4.4623271933713065e-06, "logits/chosen": -1.792682409286499, "logits/rejected": -0.9171991348266602, "logps/chosen": -787.0244750976562, "logps/rejected": -1522.90185546875, "loss": 0.055, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.226217120885849, "rewards/margins": 0.340124249458313, "rewards/rejected": -0.5663414001464844, "step": 1530 }, { "epoch": 0.29, "learning_rate": 4.451985066691649e-06, "logits/chosen": -1.338421106338501, "logits/rejected": -0.9744964838027954, "logps/chosen": -611.8841552734375, "logps/rejected": -1370.3062744140625, "loss": 0.0597, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1973779797554016, "rewards/margins": 0.29491350054740906, "rewards/rejected": -0.49229151010513306, "step": 1540 }, { "epoch": 0.3, "learning_rate": 4.441556647917447e-06, "logits/chosen": -1.5752965211868286, "logits/rejected": -1.1147037744522095, "logps/chosen": -590.4030151367188, "logps/rejected": -1291.9884033203125, "loss": 0.0729, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.15929007530212402, "rewards/margins": 0.3151244521141052, "rewards/rejected": -0.47441449761390686, "step": 1550 }, { "epoch": 0.3, "learning_rate": 4.431042398061499e-06, "logits/chosen": -1.5449073314666748, "logits/rejected": -0.9329763650894165, "logps/chosen": -652.1185302734375, "logps/rejected": -1286.4637451171875, "loss": 0.0574, "rewards/accuracies": 0.875, "rewards/chosen": -0.20002980530261993, "rewards/margins": 0.2901848256587982, "rewards/rejected": -0.49021464586257935, "step": 1560 }, { "epoch": 0.3, "learning_rate": 4.420442781930971e-06, "logits/chosen": -1.851266622543335, "logits/rejected": -1.2136085033416748, "logps/chosen": -671.2997436523438, "logps/rejected": -1213.92236328125, "loss": 0.0909, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.226095512509346, "rewards/margins": 0.27147597074508667, "rewards/rejected": -0.4975714683532715, "step": 1570 }, { "epoch": 0.3, "learning_rate": 4.409758268106842e-06, "logits/chosen": -1.6012693643569946, "logits/rejected": -1.1103239059448242, "logps/chosen": -609.9935302734375, "logps/rejected": -1086.2857666015625, "loss": 0.1148, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.16556409001350403, "rewards/margins": 0.2218482941389084, "rewards/rejected": -0.3874123990535736, "step": 1580 }, { "epoch": 0.3, "learning_rate": 4.398989328923196e-06, "logits/chosen": -1.4557781219482422, "logits/rejected": -1.044471025466919, "logps/chosen": -617.2952880859375, "logps/rejected": -1107.7027587890625, "loss": 0.1086, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1871017962694168, "rewards/margins": 0.2236325442790985, "rewards/rejected": -0.4107343256473541, "step": 1590 }, { "epoch": 0.3, "learning_rate": 4.388136440446338e-06, "logits/chosen": -1.918044090270996, "logits/rejected": -1.3620249032974243, "logps/chosen": -643.0921630859375, "logps/rejected": -1395.455810546875, "loss": 0.0469, "rewards/accuracies": 0.875, "rewards/chosen": -0.15777665376663208, "rewards/margins": 0.32988977432250977, "rewards/rejected": -0.48766642808914185, "step": 1600 }, { "epoch": 0.31, "learning_rate": 4.377200082453748e-06, "logits/chosen": -1.5242416858673096, "logits/rejected": -1.0205128192901611, "logps/chosen": -644.7546997070312, "logps/rejected": -1393.016357421875, "loss": 0.0469, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.18372926115989685, "rewards/margins": 0.308932900428772, "rewards/rejected": -0.49266213178634644, "step": 1610 }, { "epoch": 0.31, "learning_rate": 4.366180738412876e-06, "logits/chosen": -1.5953199863433838, "logits/rejected": -1.0117969512939453, "logps/chosen": -865.4313354492188, "logps/rejected": -1511.0732421875, "loss": 0.0603, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.3143005073070526, "rewards/margins": 0.29054003953933716, "rewards/rejected": -0.6048405766487122, "step": 1620 }, { "epoch": 0.31, "learning_rate": 4.355078895459761e-06, "logits/chosen": -1.5991640090942383, "logits/rejected": -1.0942062139511108, "logps/chosen": -669.2398681640625, "logps/rejected": -1328.014404296875, "loss": 0.0759, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2610950469970703, "rewards/margins": 0.30377668142318726, "rewards/rejected": -0.5648717880249023, "step": 1630 }, { "epoch": 0.31, "learning_rate": 4.343895044377504e-06, "logits/chosen": -1.5109912157058716, "logits/rejected": -1.0822455883026123, "logps/chosen": -692.4320068359375, "logps/rejected": -1503.792236328125, "loss": 0.0686, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1996488720178604, "rewards/margins": 0.35995012521743774, "rewards/rejected": -0.5595989227294922, "step": 1640 }, { "epoch": 0.31, "learning_rate": 4.332629679574566e-06, "logits/chosen": -1.8324410915374756, "logits/rejected": -1.1902332305908203, "logps/chosen": -566.93603515625, "logps/rejected": -1152.119384765625, "loss": 0.0927, "rewards/accuracies": 0.875, "rewards/chosen": -0.08348747342824936, "rewards/margins": 0.2730974853038788, "rewards/rejected": -0.35658496618270874, "step": 1650 }, { "epoch": 0.32, "learning_rate": 4.321283299062916e-06, "logits/chosen": -1.6973434686660767, "logits/rejected": -1.0856391191482544, "logps/chosen": -560.3250122070312, "logps/rejected": -1092.025146484375, "loss": 0.0841, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.09070460498332977, "rewards/margins": 0.25446048378944397, "rewards/rejected": -0.34516507387161255, "step": 1660 }, { "epoch": 0.32, "learning_rate": 4.309856404436013e-06, "logits/chosen": -1.6770203113555908, "logits/rejected": -1.1826202869415283, "logps/chosen": -546.0051879882812, "logps/rejected": -1200.4798583984375, "loss": 0.0628, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.08990595489740372, "rewards/margins": 0.2899653911590576, "rewards/rejected": -0.3798713982105255, "step": 1670 }, { "epoch": 0.32, "learning_rate": 4.2983495008466285e-06, "logits/chosen": -1.7677803039550781, "logits/rejected": -1.1159054040908813, "logps/chosen": -704.0637817382812, "logps/rejected": -1257.2755126953125, "loss": 0.0765, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.12143462896347046, "rewards/margins": 0.2974852919578552, "rewards/rejected": -0.4189198911190033, "step": 1680 }, { "epoch": 0.32, "learning_rate": 4.2867630969845235e-06, "logits/chosen": -1.4960908889770508, "logits/rejected": -1.0935460329055786, "logps/chosen": -531.4020385742188, "logps/rejected": -1232.95166015625, "loss": 0.0676, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.07758744060993195, "rewards/margins": 0.30141371488571167, "rewards/rejected": -0.3790012001991272, "step": 1690 }, { "epoch": 0.32, "learning_rate": 4.275097705053951e-06, "logits/chosen": -1.3998308181762695, "logits/rejected": -0.9511981010437012, "logps/chosen": -518.6180419921875, "logps/rejected": -1279.418701171875, "loss": 0.09, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.11437869071960449, "rewards/margins": 0.31818702816963196, "rewards/rejected": -0.43256568908691406, "step": 1700 }, { "epoch": 0.33, "learning_rate": 4.263353840751023e-06, "logits/chosen": -1.6988664865493774, "logits/rejected": -1.3357477188110352, "logps/chosen": -619.7531127929688, "logps/rejected": -1278.7855224609375, "loss": 0.0881, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.15841639041900635, "rewards/margins": 0.2583143413066864, "rewards/rejected": -0.41673073172569275, "step": 1710 }, { "epoch": 0.33, "learning_rate": 4.251532023240901e-06, "logits/chosen": -1.6375110149383545, "logits/rejected": -1.2145359516143799, "logps/chosen": -635.3146362304688, "logps/rejected": -1267.1383056640625, "loss": 0.094, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1674066036939621, "rewards/margins": 0.26885929703712463, "rewards/rejected": -0.43626588582992554, "step": 1720 }, { "epoch": 0.33, "learning_rate": 4.239632775134857e-06, "logits/chosen": -1.4710214138031006, "logits/rejected": -0.9805693626403809, "logps/chosen": -580.4301147460938, "logps/rejected": -1319.4400634765625, "loss": 0.0685, "rewards/accuracies": 0.875, "rewards/chosen": -0.1512683480978012, "rewards/margins": 0.29545730352401733, "rewards/rejected": -0.44672566652297974, "step": 1730 }, { "epoch": 0.33, "learning_rate": 4.227656622467162e-06, "logits/chosen": -1.745552659034729, "logits/rejected": -1.2884304523468018, "logps/chosen": -468.9326171875, "logps/rejected": -1042.484619140625, "loss": 0.0868, "rewards/accuracies": 0.75, "rewards/chosen": -0.07101878523826599, "rewards/margins": 0.23783501982688904, "rewards/rejected": -0.30885380506515503, "step": 1740 }, { "epoch": 0.33, "learning_rate": 4.215604094671835e-06, "logits/chosen": -1.6640018224716187, "logits/rejected": -0.9551274180412292, "logps/chosen": -606.9476318359375, "logps/rejected": -1325.010986328125, "loss": 0.0545, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.10354119539260864, "rewards/margins": 0.33232301473617554, "rewards/rejected": -0.4358642101287842, "step": 1750 }, { "epoch": 0.34, "learning_rate": 4.203475724559235e-06, "logits/chosen": -1.4627963304519653, "logits/rejected": -1.1045763492584229, "logps/chosen": -531.2335815429688, "logps/rejected": -1144.1085205078125, "loss": 0.0791, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.13256287574768066, "rewards/margins": 0.2571134865283966, "rewards/rejected": -0.3896763324737549, "step": 1760 }, { "epoch": 0.34, "learning_rate": 4.191272048292514e-06, "logits/chosen": -1.7131484746932983, "logits/rejected": -1.199053406715393, "logps/chosen": -495.76568603515625, "logps/rejected": -1074.788818359375, "loss": 0.0709, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.10537634044885635, "rewards/margins": 0.2483271062374115, "rewards/rejected": -0.35370343923568726, "step": 1770 }, { "epoch": 0.34, "learning_rate": 4.178993605363904e-06, "logits/chosen": -1.7240636348724365, "logits/rejected": -0.839851975440979, "logps/chosen": -684.6185302734375, "logps/rejected": -1250.919189453125, "loss": 0.0881, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1622859537601471, "rewards/margins": 0.2836538851261139, "rewards/rejected": -0.4459398686885834, "step": 1780 }, { "epoch": 0.34, "learning_rate": 4.166640938570879e-06, "logits/chosen": -1.6593945026397705, "logits/rejected": -0.8714178204536438, "logps/chosen": -645.1300048828125, "logps/rejected": -1271.795166015625, "loss": 0.0606, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.11708177626132965, "rewards/margins": 0.3288155198097229, "rewards/rejected": -0.44589725136756897, "step": 1790 }, { "epoch": 0.34, "learning_rate": 4.154214593992149e-06, "logits/chosen": -1.490355134010315, "logits/rejected": -0.9209915399551392, "logps/chosen": -571.5134887695312, "logps/rejected": -1043.43798828125, "loss": 0.0922, "rewards/accuracies": 0.75, "rewards/chosen": -0.11676846444606781, "rewards/margins": 0.24484193325042725, "rewards/rejected": -0.36161044239997864, "step": 1800 }, { "epoch": 0.34, "learning_rate": 4.1417151209635265e-06, "logits/chosen": -1.558685064315796, "logits/rejected": -1.095737099647522, "logps/chosen": -625.5676879882812, "logps/rejected": -1183.7132568359375, "loss": 0.0791, "rewards/accuracies": 0.875, "rewards/chosen": -0.1728230118751526, "rewards/margins": 0.2501334547996521, "rewards/rejected": -0.4229564070701599, "step": 1810 }, { "epoch": 0.35, "learning_rate": 4.129143072053639e-06, "logits/chosen": -1.5196239948272705, "logits/rejected": -0.9427810907363892, "logps/chosen": -717.9283447265625, "logps/rejected": -1308.8529052734375, "loss": 0.0855, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.19684462249279022, "rewards/margins": 0.2644059956073761, "rewards/rejected": -0.4612506330013275, "step": 1820 }, { "epoch": 0.35, "learning_rate": 4.116499003039499e-06, "logits/chosen": -1.5101161003112793, "logits/rejected": -1.0998636484146118, "logps/chosen": -608.6380615234375, "logps/rejected": -1254.279541015625, "loss": 0.0776, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.15637364983558655, "rewards/margins": 0.2847002148628235, "rewards/rejected": -0.44107383489608765, "step": 1830 }, { "epoch": 0.35, "learning_rate": 4.103783472881942e-06, "logits/chosen": -1.4874773025512695, "logits/rejected": -0.9665767550468445, "logps/chosen": -609.3054809570312, "logps/rejected": -1297.626708984375, "loss": 0.0808, "rewards/accuracies": 0.875, "rewards/chosen": -0.1461195945739746, "rewards/margins": 0.28892236948013306, "rewards/rejected": -0.4350419044494629, "step": 1840 }, { "epoch": 0.35, "learning_rate": 4.0909970437009094e-06, "logits/chosen": -1.8195278644561768, "logits/rejected": -1.0763853788375854, "logps/chosen": -610.7991333007812, "logps/rejected": -1304.9736328125, "loss": 0.0679, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.13620565831661224, "rewards/margins": 0.3199845850467682, "rewards/rejected": -0.4561902582645416, "step": 1850 }, { "epoch": 0.35, "learning_rate": 4.078140280750598e-06, "logits/chosen": -1.6017236709594727, "logits/rejected": -1.0406373739242554, "logps/chosen": -633.7638549804688, "logps/rejected": -1135.013671875, "loss": 0.131, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1918884664773941, "rewards/margins": 0.23026582598686218, "rewards/rejected": -0.42215433716773987, "step": 1860 }, { "epoch": 0.36, "learning_rate": 4.065213752394478e-06, "logits/chosen": -1.5482735633850098, "logits/rejected": -0.8328466415405273, "logps/chosen": -705.640625, "logps/rejected": -1373.3658447265625, "loss": 0.0794, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.21625776588916779, "rewards/margins": 0.29595327377319336, "rewards/rejected": -0.5122110247612, "step": 1870 }, { "epoch": 0.36, "learning_rate": 4.052218030080162e-06, "logits/chosen": -1.3692519664764404, "logits/rejected": -0.972745418548584, "logps/chosen": -606.39501953125, "logps/rejected": -1162.306640625, "loss": 0.0954, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.20994558930397034, "rewards/margins": 0.2586881220340729, "rewards/rejected": -0.46863365173339844, "step": 1880 }, { "epoch": 0.36, "learning_rate": 4.039153688314146e-06, "logits/chosen": -1.484716773033142, "logits/rejected": -0.7318023443222046, "logps/chosen": -576.1600341796875, "logps/rejected": -1257.232666015625, "loss": 0.0475, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.10188218206167221, "rewards/margins": 0.3267883360385895, "rewards/rejected": -0.4286704659461975, "step": 1890 }, { "epoch": 0.36, "learning_rate": 4.026021304636408e-06, "logits/chosen": -1.5514335632324219, "logits/rejected": -0.8337615132331848, "logps/chosen": -563.2936401367188, "logps/rejected": -1291.733154296875, "loss": 0.0387, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.1273859441280365, "rewards/margins": 0.3672037720680237, "rewards/rejected": -0.4945897161960602, "step": 1900 }, { "epoch": 0.36, "learning_rate": 4.012821459594881e-06, "logits/chosen": -1.5965911149978638, "logits/rejected": -0.8250566720962524, "logps/chosen": -533.0094604492188, "logps/rejected": -1278.1763916015625, "loss": 0.0492, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.12176971137523651, "rewards/margins": 0.35596519708633423, "rewards/rejected": -0.47773489356040955, "step": 1910 }, { "epoch": 0.37, "learning_rate": 3.999554736719785e-06, "logits/chosen": -1.5723917484283447, "logits/rejected": -1.016540288925171, "logps/chosen": -640.8189697265625, "logps/rejected": -1321.976318359375, "loss": 0.0647, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1868075430393219, "rewards/margins": 0.2953697144985199, "rewards/rejected": -0.4821773171424866, "step": 1920 }, { "epoch": 0.37, "learning_rate": 3.986221722497832e-06, "logits/chosen": -1.5826764106750488, "logits/rejected": -0.9718710780143738, "logps/chosen": -663.3128051757812, "logps/rejected": -1344.0526123046875, "loss": 0.0816, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.19100730121135712, "rewards/margins": 0.3145214021205902, "rewards/rejected": -0.5055286884307861, "step": 1930 }, { "epoch": 0.37, "learning_rate": 3.9728230063463e-06, "logits/chosen": -1.5128819942474365, "logits/rejected": -1.130190372467041, "logps/chosen": -516.5947265625, "logps/rejected": -1127.3963623046875, "loss": 0.0724, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.15435567498207092, "rewards/margins": 0.2678048312664032, "rewards/rejected": -0.42216047644615173, "step": 1940 }, { "epoch": 0.37, "learning_rate": 3.9593591805869755e-06, "logits/chosen": -1.5318098068237305, "logits/rejected": -0.9274725914001465, "logps/chosen": -549.1895751953125, "logps/rejected": -1094.859130859375, "loss": 0.0795, "rewards/accuracies": 0.75, "rewards/chosen": -0.1255248486995697, "rewards/margins": 0.26419132947921753, "rewards/rejected": -0.38971614837646484, "step": 1950 }, { "epoch": 0.37, "learning_rate": 3.945830840419966e-06, "logits/chosen": -1.7480872869491577, "logits/rejected": -0.9923027753829956, "logps/chosen": -624.6234130859375, "logps/rejected": -1274.6939697265625, "loss": 0.0874, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.14473918080329895, "rewards/margins": 0.3186899721622467, "rewards/rejected": -0.46342915296554565, "step": 1960 }, { "epoch": 0.38, "learning_rate": 3.932238583897395e-06, "logits/chosen": -1.560272455215454, "logits/rejected": -0.9629675149917603, "logps/chosen": -711.4615478515625, "logps/rejected": -1165.3927001953125, "loss": 0.1287, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.20542404055595398, "rewards/margins": 0.2213752716779709, "rewards/rejected": -0.42679935693740845, "step": 1970 }, { "epoch": 0.38, "learning_rate": 3.918583011896955e-06, "logits/chosen": -1.5640978813171387, "logits/rejected": -1.0945550203323364, "logps/chosen": -573.6983032226562, "logps/rejected": -1243.242431640625, "loss": 0.0838, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.12780186533927917, "rewards/margins": 0.28756803274154663, "rewards/rejected": -0.4153698980808258, "step": 1980 }, { "epoch": 0.38, "learning_rate": 3.904864728095349e-06, "logits/chosen": -1.6314102411270142, "logits/rejected": -1.1875035762786865, "logps/chosen": -564.3800659179688, "logps/rejected": -1137.12744140625, "loss": 0.078, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.07501459121704102, "rewards/margins": 0.2666712701320648, "rewards/rejected": -0.34168586134910583, "step": 1990 }, { "epoch": 0.38, "learning_rate": 3.891084338941603e-06, "logits/chosen": -1.6454538106918335, "logits/rejected": -0.9986695051193237, "logps/chosen": -591.3613891601562, "logps/rejected": -1145.727783203125, "loss": 0.0738, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.09405544400215149, "rewards/margins": 0.28904372453689575, "rewards/rejected": -0.38309913873672485, "step": 2000 }, { "epoch": 0.38, "learning_rate": 3.8772424536302565e-06, "logits/chosen": -1.5474951267242432, "logits/rejected": -0.8243762850761414, "logps/chosen": -535.3194580078125, "logps/rejected": -1299.9228515625, "loss": 0.065, "rewards/accuracies": 0.875, "rewards/chosen": -0.10869929939508438, "rewards/margins": 0.3450026512145996, "rewards/rejected": -0.453701913356781, "step": 2010 }, { "epoch": 0.38, "learning_rate": 3.863339684074432e-06, "logits/chosen": -1.6806402206420898, "logits/rejected": -1.0863831043243408, "logps/chosen": -759.7283935546875, "logps/rejected": -1388.066162109375, "loss": 0.0695, "rewards/accuracies": 0.875, "rewards/chosen": -0.2040364295244217, "rewards/margins": 0.2842352092266083, "rewards/rejected": -0.48827165365219116, "step": 2020 }, { "epoch": 0.39, "learning_rate": 3.849376644878783e-06, "logits/chosen": -1.3155741691589355, "logits/rejected": -1.046635627746582, "logps/chosen": -516.9121704101562, "logps/rejected": -1351.8599853515625, "loss": 0.0422, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.12792593240737915, "rewards/margins": 0.3376065790653229, "rewards/rejected": -0.4655325412750244, "step": 2030 }, { "epoch": 0.39, "learning_rate": 3.835353953312322e-06, "logits/chosen": -1.5681509971618652, "logits/rejected": -1.044856071472168, "logps/chosen": -618.40576171875, "logps/rejected": -1278.9957275390625, "loss": 0.0497, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.10632064193487167, "rewards/margins": 0.31042590737342834, "rewards/rejected": -0.4167465269565582, "step": 2040 }, { "epoch": 0.39, "learning_rate": 3.821272229281139e-06, "logits/chosen": -1.7507911920547485, "logits/rejected": -1.2355482578277588, "logps/chosen": -502.3592224121094, "logps/rejected": -1111.670654296875, "loss": 0.1022, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.0735398381948471, "rewards/margins": 0.29525768756866455, "rewards/rejected": -0.36879754066467285, "step": 2050 }, { "epoch": 0.39, "learning_rate": 3.8071320953009906e-06, "logits/chosen": -1.7723318338394165, "logits/rejected": -0.853528618812561, "logps/chosen": -646.585205078125, "logps/rejected": -1187.48291015625, "loss": 0.1126, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.11552548408508301, "rewards/margins": 0.2491040676832199, "rewards/rejected": -0.3646295666694641, "step": 2060 }, { "epoch": 0.39, "learning_rate": 3.792934176469782e-06, "logits/chosen": -1.5255523920059204, "logits/rejected": -1.0040549039840698, "logps/chosen": -517.2431640625, "logps/rejected": -1240.179931640625, "loss": 0.0637, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.12137254327535629, "rewards/margins": 0.30170968174934387, "rewards/rejected": -0.42308226227760315, "step": 2070 }, { "epoch": 0.4, "learning_rate": 3.7786791004399353e-06, "logits/chosen": -1.5424957275390625, "logits/rejected": -0.9886695742607117, "logps/chosen": -652.4849243164062, "logps/rejected": -1332.295654296875, "loss": 0.0549, "rewards/accuracies": 0.875, "rewards/chosen": -0.16771355271339417, "rewards/margins": 0.30935198068618774, "rewards/rejected": -0.4770655035972595, "step": 2080 }, { "epoch": 0.4, "learning_rate": 3.764367497390642e-06, "logits/chosen": -1.5075163841247559, "logits/rejected": -0.9398325681686401, "logps/chosen": -762.2354125976562, "logps/rejected": -1400.30126953125, "loss": 0.0722, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.22768834233283997, "rewards/margins": 0.2723497748374939, "rewards/rejected": -0.5000380873680115, "step": 2090 }, { "epoch": 0.4, "learning_rate": 3.7500000000000005e-06, "logits/chosen": -1.7654527425765991, "logits/rejected": -0.8684778213500977, "logps/chosen": -616.1461181640625, "logps/rejected": -1208.4818115234375, "loss": 0.0745, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.15020321309566498, "rewards/margins": 0.31690362095832825, "rewards/rejected": -0.46710681915283203, "step": 2100 }, { "epoch": 0.4, "learning_rate": 3.7355772434170523e-06, "logits/chosen": -1.4072939157485962, "logits/rejected": -0.8714143633842468, "logps/chosen": -672.609375, "logps/rejected": -1258.9796142578125, "loss": 0.0808, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.19290420413017273, "rewards/margins": 0.28400301933288574, "rewards/rejected": -0.4769071936607361, "step": 2110 }, { "epoch": 0.4, "learning_rate": 3.7210998652337016e-06, "logits/chosen": -1.7671144008636475, "logits/rejected": -1.125576376914978, "logps/chosen": -663.3321533203125, "logps/rejected": -1265.360595703125, "loss": 0.1121, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1786895990371704, "rewards/margins": 0.2822434902191162, "rewards/rejected": -0.46093305945396423, "step": 2120 }, { "epoch": 0.41, "learning_rate": 3.7065685054565277e-06, "logits/chosen": -1.755200743675232, "logits/rejected": -0.9497678875923157, "logps/chosen": -624.0568237304688, "logps/rejected": -1243.8646240234375, "loss": 0.0712, "rewards/accuracies": 0.75, "rewards/chosen": -0.1317531168460846, "rewards/margins": 0.28538697957992554, "rewards/rejected": -0.41714009642601013, "step": 2130 }, { "epoch": 0.41, "learning_rate": 3.691983806478494e-06, "logits/chosen": -1.3938941955566406, "logits/rejected": -0.9991312026977539, "logps/chosen": -609.2355346679688, "logps/rejected": -1163.9844970703125, "loss": 0.1083, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1537613570690155, "rewards/margins": 0.22227540612220764, "rewards/rejected": -0.37603679299354553, "step": 2140 }, { "epoch": 0.41, "learning_rate": 3.677346413050551e-06, "logits/chosen": -1.7743680477142334, "logits/rejected": -0.8094174265861511, "logps/chosen": -645.5819091796875, "logps/rejected": -1326.28369140625, "loss": 0.0626, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.17391979694366455, "rewards/margins": 0.29295578598976135, "rewards/rejected": -0.4668755531311035, "step": 2150 }, { "epoch": 0.41, "learning_rate": 3.6626569722531268e-06, "logits/chosen": -1.8744360208511353, "logits/rejected": -1.0872384309768677, "logps/chosen": -745.5087890625, "logps/rejected": -1093.416748046875, "loss": 0.1225, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2028745710849762, "rewards/margins": 0.18700726330280304, "rewards/rejected": -0.38988178968429565, "step": 2160 }, { "epoch": 0.41, "learning_rate": 3.6479161334675294e-06, "logits/chosen": -1.374147653579712, "logits/rejected": -0.8410941362380981, "logps/chosen": -594.9989624023438, "logps/rejected": -1234.977294921875, "loss": 0.0598, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.11717710644006729, "rewards/margins": 0.306627482175827, "rewards/rejected": -0.4238045811653137, "step": 2170 }, { "epoch": 0.42, "learning_rate": 3.6331245483472353e-06, "logits/chosen": -1.2791547775268555, "logits/rejected": -1.0032517910003662, "logps/chosen": -657.0687255859375, "logps/rejected": -1368.2298583984375, "loss": 0.0674, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.18623146414756775, "rewards/margins": 0.30436015129089355, "rewards/rejected": -0.4905916750431061, "step": 2180 }, { "epoch": 0.42, "learning_rate": 3.6182828707890816e-06, "logits/chosen": -1.5984619855880737, "logits/rejected": -1.1988598108291626, "logps/chosen": -685.6885375976562, "logps/rejected": -1425.386474609375, "loss": 0.0668, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.204415962100029, "rewards/margins": 0.3183789551258087, "rewards/rejected": -0.5227949023246765, "step": 2190 }, { "epoch": 0.42, "learning_rate": 3.6033917569043604e-06, "logits/chosen": -1.4690712690353394, "logits/rejected": -1.0461918115615845, "logps/chosen": -603.8146362304688, "logps/rejected": -1344.2086181640625, "loss": 0.066, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.21491065621376038, "rewards/margins": 0.3031674027442932, "rewards/rejected": -0.5180780291557312, "step": 2200 }, { "epoch": 0.42, "learning_rate": 3.588451864989811e-06, "logits/chosen": -1.5433639287948608, "logits/rejected": -1.1572163105010986, "logps/chosen": -604.7794189453125, "logps/rejected": -1369.9986572265625, "loss": 0.0576, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.17777907848358154, "rewards/margins": 0.33150824904441833, "rewards/rejected": -0.5092872977256775, "step": 2210 }, { "epoch": 0.42, "learning_rate": 3.5734638554985234e-06, "logits/chosen": -1.6556087732315063, "logits/rejected": -0.890389084815979, "logps/chosen": -683.4158935546875, "logps/rejected": -1303.145263671875, "loss": 0.0646, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.18300040066242218, "rewards/margins": 0.3182368576526642, "rewards/rejected": -0.5012372136116028, "step": 2220 }, { "epoch": 0.42, "learning_rate": 3.5584283910107343e-06, "logits/chosen": -1.6998045444488525, "logits/rejected": -0.9639657139778137, "logps/chosen": -666.9964599609375, "logps/rejected": -1350.66357421875, "loss": 0.0643, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1677488386631012, "rewards/margins": 0.34021884202957153, "rewards/rejected": -0.5079677104949951, "step": 2230 }, { "epoch": 0.43, "learning_rate": 3.543346136204545e-06, "logits/chosen": -1.533036231994629, "logits/rejected": -1.0550358295440674, "logps/chosen": -673.4791259765625, "logps/rejected": -1482.1470947265625, "loss": 0.0514, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.1976957768201828, "rewards/margins": 0.3370071351528168, "rewards/rejected": -0.5347028970718384, "step": 2240 }, { "epoch": 0.43, "learning_rate": 3.5282177578265295e-06, "logits/chosen": -1.7097774744033813, "logits/rejected": -1.0682802200317383, "logps/chosen": -583.5718994140625, "logps/rejected": -1295.059326171875, "loss": 0.0591, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.14968575537204742, "rewards/margins": 0.3431374430656433, "rewards/rejected": -0.49282321333885193, "step": 2250 }, { "epoch": 0.43, "learning_rate": 3.5130439246622635e-06, "logits/chosen": -1.3002750873565674, "logits/rejected": -0.9815470576286316, "logps/chosen": -614.1593017578125, "logps/rejected": -1260.708740234375, "loss": 0.0932, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1836967170238495, "rewards/margins": 0.3018070459365845, "rewards/rejected": -0.4855037331581116, "step": 2260 }, { "epoch": 0.43, "learning_rate": 3.497825307506758e-06, "logits/chosen": -1.7087440490722656, "logits/rejected": -0.9878816604614258, "logps/chosen": -652.6859741210938, "logps/rejected": -1311.63134765625, "loss": 0.0807, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.19891947507858276, "rewards/margins": 0.2804645597934723, "rewards/rejected": -0.47938403487205505, "step": 2270 }, { "epoch": 0.43, "learning_rate": 3.4825625791348093e-06, "logits/chosen": -1.4375241994857788, "logits/rejected": -0.8479606509208679, "logps/chosen": -644.3697509765625, "logps/rejected": -1250.4058837890625, "loss": 0.0716, "rewards/accuracies": 0.875, "rewards/chosen": -0.1917545348405838, "rewards/margins": 0.27922800183296204, "rewards/rejected": -0.47098255157470703, "step": 2280 }, { "epoch": 0.44, "learning_rate": 3.467256414271249e-06, "logits/chosen": -1.6169170141220093, "logits/rejected": -0.7455465197563171, "logps/chosen": -713.6448364257812, "logps/rejected": -1382.803955078125, "loss": 0.0291, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.18694797158241272, "rewards/margins": 0.3667767643928528, "rewards/rejected": -0.5537247061729431, "step": 2290 }, { "epoch": 0.44, "learning_rate": 3.4519074895611245e-06, "logits/chosen": -1.3121281862258911, "logits/rejected": -0.8998733758926392, "logps/chosen": -698.1446533203125, "logps/rejected": -1322.88427734375, "loss": 0.0839, "rewards/accuracies": 0.875, "rewards/chosen": -0.2285010814666748, "rewards/margins": 0.3055169880390167, "rewards/rejected": -0.5340181589126587, "step": 2300 }, { "epoch": 0.44, "learning_rate": 3.436516483539781e-06, "logits/chosen": -1.8377107381820679, "logits/rejected": -1.1042895317077637, "logps/chosen": -671.3965454101562, "logps/rejected": -1484.2662353515625, "loss": 0.0507, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.15971992909908295, "rewards/margins": 0.37924593687057495, "rewards/rejected": -0.5389658212661743, "step": 2310 }, { "epoch": 0.44, "learning_rate": 3.421084076602867e-06, "logits/chosen": -1.4560115337371826, "logits/rejected": -1.019805669784546, "logps/chosen": -437.4620666503906, "logps/rejected": -1163.4324951171875, "loss": 0.1034, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.11113061010837555, "rewards/margins": 0.3120535910129547, "rewards/rejected": -0.42318421602249146, "step": 2320 }, { "epoch": 0.44, "learning_rate": 3.405610950976257e-06, "logits/chosen": -1.5794904232025146, "logits/rejected": -0.8965857625007629, "logps/chosen": -640.6691284179688, "logps/rejected": -1358.776611328125, "loss": 0.0541, "rewards/accuracies": 0.875, "rewards/chosen": -0.14796991646289825, "rewards/margins": 0.3194065988063812, "rewards/rejected": -0.4673764705657959, "step": 2330 }, { "epoch": 0.45, "learning_rate": 3.3900977906858923e-06, "logits/chosen": -1.5456417798995972, "logits/rejected": -0.9718359112739563, "logps/chosen": -615.3480834960938, "logps/rejected": -1091.6168212890625, "loss": 0.1036, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.17209769785404205, "rewards/margins": 0.21953482925891876, "rewards/rejected": -0.3916325271129608, "step": 2340 }, { "epoch": 0.45, "learning_rate": 3.3745452815275375e-06, "logits/chosen": -1.7035932540893555, "logits/rejected": -0.9176031947135925, "logps/chosen": -767.6522827148438, "logps/rejected": -1360.09619140625, "loss": 0.076, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1860385239124298, "rewards/margins": 0.3071725368499756, "rewards/rejected": -0.49321097135543823, "step": 2350 }, { "epoch": 0.45, "learning_rate": 3.3589541110364678e-06, "logits/chosen": -1.5451462268829346, "logits/rejected": -0.9727177619934082, "logps/chosen": -720.4700927734375, "logps/rejected": -1452.360107421875, "loss": 0.0443, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.21592077612876892, "rewards/margins": 0.33422279357910156, "rewards/rejected": -0.5501435995101929, "step": 2360 }, { "epoch": 0.45, "learning_rate": 3.3433249684570757e-06, "logits/chosen": -1.5172083377838135, "logits/rejected": -0.8823717832565308, "logps/chosen": -560.3057861328125, "logps/rejected": -1079.5458984375, "loss": 0.0965, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.15911349654197693, "rewards/margins": 0.2574231028556824, "rewards/rejected": -0.4165366590023041, "step": 2370 }, { "epoch": 0.45, "learning_rate": 3.3276585447123957e-06, "logits/chosen": -1.3937236070632935, "logits/rejected": -0.9429960250854492, "logps/chosen": -602.1780395507812, "logps/rejected": -1398.97998046875, "loss": 0.0482, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.21115748584270477, "rewards/margins": 0.33564311265945435, "rewards/rejected": -0.5468006134033203, "step": 2380 }, { "epoch": 0.46, "learning_rate": 3.3119555323735664e-06, "logits/chosen": -1.4655485153198242, "logits/rejected": -0.8040722608566284, "logps/chosen": -668.6180419921875, "logps/rejected": -1413.5433349609375, "loss": 0.056, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.22061499953269958, "rewards/margins": 0.3491475582122803, "rewards/rejected": -0.5697625279426575, "step": 2390 }, { "epoch": 0.46, "learning_rate": 3.2962166256292116e-06, "logits/chosen": -1.4201056957244873, "logits/rejected": -0.9808454513549805, "logps/chosen": -639.6843872070312, "logps/rejected": -1323.3402099609375, "loss": 0.0663, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.18054111301898956, "rewards/margins": 0.3211413323879242, "rewards/rejected": -0.501682460308075, "step": 2400 }, { "epoch": 0.46, "learning_rate": 3.2804425202547494e-06, "logits/chosen": -1.8520081043243408, "logits/rejected": -1.2786279916763306, "logps/chosen": -660.5281982421875, "logps/rejected": -1369.4189453125, "loss": 0.0553, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.1543307602405548, "rewards/margins": 0.35364800691604614, "rewards/rejected": -0.5079787969589233, "step": 2410 }, { "epoch": 0.46, "learning_rate": 3.2646339135816386e-06, "logits/chosen": -1.6647218465805054, "logits/rejected": -1.114368200302124, "logps/chosen": -613.7562255859375, "logps/rejected": -1222.032958984375, "loss": 0.0533, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.13925352692604065, "rewards/margins": 0.30394676327705383, "rewards/rejected": -0.4432002902030945, "step": 2420 }, { "epoch": 0.46, "learning_rate": 3.2487915044665485e-06, "logits/chosen": -1.4685899019241333, "logits/rejected": -1.0434587001800537, "logps/chosen": -553.0774536132812, "logps/rejected": -1291.67529296875, "loss": 0.067, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1718519628047943, "rewards/margins": 0.34229570627212524, "rewards/rejected": -0.5141476392745972, "step": 2430 }, { "epoch": 0.46, "learning_rate": 3.2329159932604638e-06, "logits/chosen": -1.6335970163345337, "logits/rejected": -1.0629937648773193, "logps/chosen": -600.8878173828125, "logps/rejected": -1243.2655029296875, "loss": 0.0684, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.18541783094406128, "rewards/margins": 0.29092368483543396, "rewards/rejected": -0.4763415455818176, "step": 2440 }, { "epoch": 0.47, "learning_rate": 3.217008081777726e-06, "logits/chosen": -1.2788913249969482, "logits/rejected": -0.8486631512641907, "logps/chosen": -523.6038818359375, "logps/rejected": -1205.5172119140625, "loss": 0.0905, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.17843207716941833, "rewards/margins": 0.2737273573875427, "rewards/rejected": -0.45215946435928345, "step": 2450 }, { "epoch": 0.47, "learning_rate": 3.201068473265007e-06, "logits/chosen": -1.6170251369476318, "logits/rejected": -0.9501678347587585, "logps/chosen": -626.2254638671875, "logps/rejected": -1324.7999267578125, "loss": 0.0563, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.1649116575717926, "rewards/margins": 0.324629008769989, "rewards/rejected": -0.4895406663417816, "step": 2460 }, { "epoch": 0.47, "learning_rate": 3.1850978723702213e-06, "logits/chosen": -1.531653642654419, "logits/rejected": -0.844316840171814, "logps/chosen": -622.3458862304688, "logps/rejected": -1382.773193359375, "loss": 0.0393, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.15444083511829376, "rewards/margins": 0.37400901317596436, "rewards/rejected": -0.5284498929977417, "step": 2470 }, { "epoch": 0.47, "learning_rate": 3.1690969851113724e-06, "logits/chosen": -1.6974732875823975, "logits/rejected": -1.165449619293213, "logps/chosen": -709.1497802734375, "logps/rejected": -1315.007568359375, "loss": 0.0825, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.18818387389183044, "rewards/margins": 0.3215584456920624, "rewards/rejected": -0.5097423195838928, "step": 2480 }, { "epoch": 0.47, "learning_rate": 3.1530665188453463e-06, "logits/chosen": -1.3408528566360474, "logits/rejected": -1.0513417720794678, "logps/chosen": -562.9158325195312, "logps/rejected": -1255.3634033203125, "loss": 0.0658, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.16134487092494965, "rewards/margins": 0.29531365633010864, "rewards/rejected": -0.4566585123538971, "step": 2490 }, { "epoch": 0.48, "learning_rate": 3.137007182236637e-06, "logits/chosen": -1.8058862686157227, "logits/rejected": -1.1288154125213623, "logps/chosen": -730.669189453125, "logps/rejected": -1342.0849609375, "loss": 0.0575, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1855761706829071, "rewards/margins": 0.3109329044818878, "rewards/rejected": -0.4965090751647949, "step": 2500 }, { "epoch": 0.48, "learning_rate": 3.1209196852260204e-06, "logits/chosen": -1.762399673461914, "logits/rejected": -1.2845382690429688, "logps/chosen": -559.9722900390625, "logps/rejected": -1243.078125, "loss": 0.0805, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1693984866142273, "rewards/margins": 0.28685134649276733, "rewards/rejected": -0.456249862909317, "step": 2510 }, { "epoch": 0.48, "learning_rate": 3.1048047389991693e-06, "logits/chosen": -1.5080870389938354, "logits/rejected": -1.1010701656341553, "logps/chosen": -727.5679931640625, "logps/rejected": -1342.488525390625, "loss": 0.0848, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2452806681394577, "rewards/margins": 0.27895987033843994, "rewards/rejected": -0.5242404937744141, "step": 2520 }, { "epoch": 0.48, "learning_rate": 3.0886630559552144e-06, "logits/chosen": -1.7736622095108032, "logits/rejected": -0.8815032243728638, "logps/chosen": -762.474609375, "logps/rejected": -1368.76171875, "loss": 0.0778, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.25478002429008484, "rewards/margins": 0.3027140200138092, "rewards/rejected": -0.557494044303894, "step": 2530 }, { "epoch": 0.48, "learning_rate": 3.072495349675249e-06, "logits/chosen": -1.6037559509277344, "logits/rejected": -1.0513917207717896, "logps/chosen": -613.7960205078125, "logps/rejected": -1140.569580078125, "loss": 0.0906, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.19967949390411377, "rewards/margins": 0.2505190968513489, "rewards/rejected": -0.45019856095314026, "step": 2540 }, { "epoch": 0.49, "learning_rate": 3.056302334890786e-06, "logits/chosen": -1.7416324615478516, "logits/rejected": -1.020509123802185, "logps/chosen": -886.8963012695312, "logps/rejected": -1471.7713623046875, "loss": 0.0564, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.2609700560569763, "rewards/margins": 0.3177827000617981, "rewards/rejected": -0.5787526965141296, "step": 2550 }, { "epoch": 0.49, "learning_rate": 3.04008472745216e-06, "logits/chosen": -1.6769838333129883, "logits/rejected": -1.1075221300125122, "logps/chosen": -549.7738037109375, "logps/rejected": -1215.347900390625, "loss": 0.0675, "rewards/accuracies": 0.75, "rewards/chosen": -0.17719343304634094, "rewards/margins": 0.3189665973186493, "rewards/rejected": -0.496160089969635, "step": 2560 }, { "epoch": 0.49, "learning_rate": 3.0238432442968803e-06, "logits/chosen": -1.4335027933120728, "logits/rejected": -0.8432362675666809, "logps/chosen": -626.7779541015625, "logps/rejected": -1301.3828125, "loss": 0.0702, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.18088267743587494, "rewards/margins": 0.3284760117530823, "rewards/rejected": -0.5093587040901184, "step": 2570 }, { "epoch": 0.49, "learning_rate": 3.0075786034179407e-06, "logits/chosen": -1.543150782585144, "logits/rejected": -1.0602935552597046, "logps/chosen": -750.2728271484375, "logps/rejected": -1355.26220703125, "loss": 0.083, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.22942061722278595, "rewards/margins": 0.2801212668418884, "rewards/rejected": -0.5095418691635132, "step": 2580 }, { "epoch": 0.49, "learning_rate": 2.9912915238320755e-06, "logits/chosen": -1.5501201152801514, "logits/rejected": -1.0371865034103394, "logps/chosen": -632.9366455078125, "logps/rejected": -1157.901611328125, "loss": 0.0942, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.18720534443855286, "rewards/margins": 0.22448968887329102, "rewards/rejected": -0.4116950035095215, "step": 2590 }, { "epoch": 0.5, "learning_rate": 2.974982725547976e-06, "logits/chosen": -1.6694200038909912, "logits/rejected": -1.1160310506820679, "logps/chosen": -546.0211181640625, "logps/rejected": -1163.3369140625, "loss": 0.078, "rewards/accuracies": 0.875, "rewards/chosen": -0.130954772233963, "rewards/margins": 0.27275922894477844, "rewards/rejected": -0.40371403098106384, "step": 2600 }, { "epoch": 0.5, "learning_rate": 2.958652929534456e-06, "logits/chosen": -1.367626428604126, "logits/rejected": -0.8243290185928345, "logps/chosen": -595.7312622070312, "logps/rejected": -1166.314208984375, "loss": 0.0693, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.13953298330307007, "rewards/margins": 0.27511560916900635, "rewards/rejected": -0.4146485924720764, "step": 2610 }, { "epoch": 0.5, "learning_rate": 2.9423028576885894e-06, "logits/chosen": -1.763495683670044, "logits/rejected": -0.7965998649597168, "logps/chosen": -688.7870483398438, "logps/rejected": -1430.501708984375, "loss": 0.0466, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.20105794072151184, "rewards/margins": 0.35635849833488464, "rewards/rejected": -0.5574164390563965, "step": 2620 }, { "epoch": 0.5, "learning_rate": 2.9259332328037852e-06, "logits/chosen": -1.805696725845337, "logits/rejected": -1.4092562198638916, "logps/chosen": -604.6575927734375, "logps/rejected": -1143.622802734375, "loss": 0.1062, "rewards/accuracies": 0.75, "rewards/chosen": -0.16817395389080048, "rewards/margins": 0.23777250945568085, "rewards/rejected": -0.40594643354415894, "step": 2630 }, { "epoch": 0.5, "learning_rate": 2.9095447785378446e-06, "logits/chosen": -1.7092657089233398, "logits/rejected": -1.1881816387176514, "logps/chosen": -655.2656860351562, "logps/rejected": -1103.6141357421875, "loss": 0.1183, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.15867792069911957, "rewards/margins": 0.23788101971149445, "rewards/rejected": -0.396558940410614, "step": 2640 }, { "epoch": 0.5, "learning_rate": 2.893138219380964e-06, "logits/chosen": -1.615456223487854, "logits/rejected": -1.0051472187042236, "logps/chosen": -632.4168090820312, "logps/rejected": -1268.873291015625, "loss": 0.0784, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.179777592420578, "rewards/margins": 0.28583192825317383, "rewards/rejected": -0.46560949087142944, "step": 2650 }, { "epoch": 0.51, "learning_rate": 2.876714280623708e-06, "logits/chosen": -1.5862231254577637, "logits/rejected": -1.2923696041107178, "logps/chosen": -566.974365234375, "logps/rejected": -1187.7386474609375, "loss": 0.0914, "rewards/accuracies": 0.75, "rewards/chosen": -0.1301722228527069, "rewards/margins": 0.2452360838651657, "rewards/rejected": -0.3754083216190338, "step": 2660 }, { "epoch": 0.51, "learning_rate": 2.8602736883249504e-06, "logits/chosen": -1.5150556564331055, "logits/rejected": -1.1209475994110107, "logps/chosen": -525.2214965820312, "logps/rejected": -1281.676025390625, "loss": 0.0384, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.10434702783823013, "rewards/margins": 0.3269978165626526, "rewards/rejected": -0.4313448369503021, "step": 2670 }, { "epoch": 0.51, "learning_rate": 2.843817169279772e-06, "logits/chosen": -1.8272594213485718, "logits/rejected": -1.1217495203018188, "logps/chosen": -626.3189086914062, "logps/rejected": -1287.4119873046875, "loss": 0.0571, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.13381603360176086, "rewards/margins": 0.3332836627960205, "rewards/rejected": -0.467099666595459, "step": 2680 }, { "epoch": 0.51, "learning_rate": 2.8273454509873333e-06, "logits/chosen": -1.6995046138763428, "logits/rejected": -0.9833188056945801, "logps/chosen": -723.0067138671875, "logps/rejected": -1212.660400390625, "loss": 0.0937, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.18251793086528778, "rewards/margins": 0.24979586899280548, "rewards/rejected": -0.4323137700557709, "step": 2690 }, { "epoch": 0.51, "learning_rate": 2.8108592616187135e-06, "logits/chosen": -1.8046150207519531, "logits/rejected": -0.830971896648407, "logps/chosen": -650.2740478515625, "logps/rejected": -1255.9827880859375, "loss": 0.0641, "rewards/accuracies": 0.875, "rewards/chosen": -0.11181609332561493, "rewards/margins": 0.33334630727767944, "rewards/rejected": -0.44516244530677795, "step": 2700 }, { "epoch": 0.52, "learning_rate": 2.7943593299847186e-06, "logits/chosen": -1.8495075702667236, "logits/rejected": -1.0969774723052979, "logps/chosen": -656.57080078125, "logps/rejected": -1230.971435546875, "loss": 0.0643, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.15315493941307068, "rewards/margins": 0.26907438039779663, "rewards/rejected": -0.4222293496131897, "step": 2710 }, { "epoch": 0.52, "learning_rate": 2.7778463855036656e-06, "logits/chosen": -1.736000418663025, "logits/rejected": -0.9863492250442505, "logps/chosen": -610.7926025390625, "logps/rejected": -1202.56787109375, "loss": 0.068, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.14737586677074432, "rewards/margins": 0.28114116191864014, "rewards/rejected": -0.42851710319519043, "step": 2720 }, { "epoch": 0.52, "learning_rate": 2.761321158169134e-06, "logits/chosen": -1.6639608144760132, "logits/rejected": -1.19706130027771, "logps/chosen": -753.8912963867188, "logps/rejected": -1248.675537109375, "loss": 0.0972, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.24665312469005585, "rewards/margins": 0.2453705072402954, "rewards/rejected": -0.49202364683151245, "step": 2730 }, { "epoch": 0.52, "learning_rate": 2.7447843785176958e-06, "logits/chosen": -1.2429653406143188, "logits/rejected": -0.708914577960968, "logps/chosen": -684.2418212890625, "logps/rejected": -1234.620849609375, "loss": 0.0989, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.22040219604969025, "rewards/margins": 0.2539559006690979, "rewards/rejected": -0.47435808181762695, "step": 2740 }, { "epoch": 0.52, "learning_rate": 2.728236777596621e-06, "logits/chosen": -1.6828727722167969, "logits/rejected": -1.1341021060943604, "logps/chosen": -748.3656616210938, "logps/rejected": -1400.454345703125, "loss": 0.0757, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.24052861332893372, "rewards/margins": 0.3166273236274719, "rewards/rejected": -0.5571559071540833, "step": 2750 }, { "epoch": 0.53, "learning_rate": 2.7116790869315583e-06, "logits/chosen": -1.5554356575012207, "logits/rejected": -1.296067237854004, "logps/chosen": -595.21337890625, "logps/rejected": -1285.7225341796875, "loss": 0.0717, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.18583238124847412, "rewards/margins": 0.3038370609283447, "rewards/rejected": -0.48966941237449646, "step": 2760 }, { "epoch": 0.53, "learning_rate": 2.695112038494198e-06, "logits/chosen": -1.653363823890686, "logits/rejected": -0.8572921752929688, "logps/chosen": -770.357666015625, "logps/rejected": -1271.4033203125, "loss": 0.0848, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1875109076499939, "rewards/margins": 0.29938197135925293, "rewards/rejected": -0.48689284920692444, "step": 2770 }, { "epoch": 0.53, "learning_rate": 2.6785363646699125e-06, "logits/chosen": -1.532932162284851, "logits/rejected": -1.014031171798706, "logps/chosen": -666.0637817382812, "logps/rejected": -1335.8193359375, "loss": 0.0647, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.19544604420661926, "rewards/margins": 0.2952579855918884, "rewards/rejected": -0.4907039999961853, "step": 2780 }, { "epoch": 0.53, "learning_rate": 2.6619527982253796e-06, "logits/chosen": -1.6043936014175415, "logits/rejected": -1.032204031944275, "logps/chosen": -643.2554321289062, "logps/rejected": -1290.070068359375, "loss": 0.0661, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.21375803649425507, "rewards/margins": 0.2964997887611389, "rewards/rejected": -0.5102577805519104, "step": 2790 }, { "epoch": 0.53, "learning_rate": 2.6453620722761897e-06, "logits/chosen": -1.4313132762908936, "logits/rejected": -0.676213264465332, "logps/chosen": -783.4808349609375, "logps/rejected": -1314.5399169921875, "loss": 0.0723, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.26105982065200806, "rewards/margins": 0.27394813299179077, "rewards/rejected": -0.5350079536437988, "step": 2800 }, { "epoch": 0.54, "learning_rate": 2.628764920254435e-06, "logits/chosen": -1.4004179239273071, "logits/rejected": -1.0551695823669434, "logps/chosen": -716.8417358398438, "logps/rejected": -1216.4146728515625, "loss": 0.1078, "rewards/accuracies": 0.75, "rewards/chosen": -0.2723211646080017, "rewards/margins": 0.2596574127674103, "rewards/rejected": -0.5319786071777344, "step": 2810 }, { "epoch": 0.54, "learning_rate": 2.6121620758762877e-06, "logits/chosen": -1.4957189559936523, "logits/rejected": -1.1808147430419922, "logps/chosen": -696.1345825195312, "logps/rejected": -1231.830078125, "loss": 0.0986, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2595438063144684, "rewards/margins": 0.25465917587280273, "rewards/rejected": -0.5142029523849487, "step": 2820 }, { "epoch": 0.54, "learning_rate": 2.595554273109564e-06, "logits/chosen": -1.7410948276519775, "logits/rejected": -1.0684701204299927, "logps/chosen": -658.7049560546875, "logps/rejected": -1376.8575439453125, "loss": 0.0553, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.18883931636810303, "rewards/margins": 0.3513220548629761, "rewards/rejected": -0.5401612520217896, "step": 2830 }, { "epoch": 0.54, "learning_rate": 2.5789422461412776e-06, "logits/chosen": -1.5545628070831299, "logits/rejected": -0.9545906782150269, "logps/chosen": -719.914306640625, "logps/rejected": -1349.597412109375, "loss": 0.1006, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17915014922618866, "rewards/margins": 0.30565065145492554, "rewards/rejected": -0.4848008155822754, "step": 2840 }, { "epoch": 0.54, "learning_rate": 2.5623267293451827e-06, "logits/chosen": -1.498730182647705, "logits/rejected": -1.021172285079956, "logps/chosen": -648.0553588867188, "logps/rejected": -1348.339111328125, "loss": 0.0607, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1289513111114502, "rewards/margins": 0.3190954327583313, "rewards/rejected": -0.4480466842651367, "step": 2850 }, { "epoch": 0.54, "learning_rate": 2.5457084572493094e-06, "logits/chosen": -1.4058417081832886, "logits/rejected": -1.2573301792144775, "logps/chosen": -549.1610717773438, "logps/rejected": -1284.826904296875, "loss": 0.0763, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1448233425617218, "rewards/margins": 0.268394410610199, "rewards/rejected": -0.41321778297424316, "step": 2860 }, { "epoch": 0.55, "learning_rate": 2.5290881645034932e-06, "logits/chosen": -1.7953014373779297, "logits/rejected": -1.1695835590362549, "logps/chosen": -652.8411254882812, "logps/rejected": -1216.7479248046875, "loss": 0.0644, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.19003698229789734, "rewards/margins": 0.2580549716949463, "rewards/rejected": -0.44809192419052124, "step": 2870 }, { "epoch": 0.55, "learning_rate": 2.5124665858468956e-06, "logits/chosen": -1.5785634517669678, "logits/rejected": -1.002070665359497, "logps/chosen": -760.0968627929688, "logps/rejected": -1325.75, "loss": 0.0795, "rewards/accuracies": 0.875, "rewards/chosen": -0.22499170899391174, "rewards/margins": 0.2633865177631378, "rewards/rejected": -0.48837822675704956, "step": 2880 }, { "epoch": 0.55, "learning_rate": 2.4958444560755268e-06, "logits/chosen": -1.6666443347930908, "logits/rejected": -1.2207131385803223, "logps/chosen": -703.9513549804688, "logps/rejected": -1183.821533203125, "loss": 0.0872, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2172054499387741, "rewards/margins": 0.22457167506217957, "rewards/rejected": -0.44177716970443726, "step": 2890 }, { "epoch": 0.55, "learning_rate": 2.479222510009758e-06, "logits/chosen": -1.6804230213165283, "logits/rejected": -1.270926833152771, "logps/chosen": -664.9152221679688, "logps/rejected": -1291.96240234375, "loss": 0.0988, "rewards/accuracies": 0.875, "rewards/chosen": -0.1930864155292511, "rewards/margins": 0.2714272141456604, "rewards/rejected": -0.46451354026794434, "step": 2900 }, { "epoch": 0.55, "learning_rate": 2.4626014824618418e-06, "logits/chosen": -1.6413965225219727, "logits/rejected": -1.0755680799484253, "logps/chosen": -685.9994506835938, "logps/rejected": -1280.0340576171875, "loss": 0.0743, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1401532143354416, "rewards/margins": 0.2932918667793274, "rewards/rejected": -0.4334450364112854, "step": 2910 }, { "epoch": 0.56, "learning_rate": 2.445982108203422e-06, "logits/chosen": -1.56205415725708, "logits/rejected": -1.39645516872406, "logps/chosen": -594.72509765625, "logps/rejected": -1270.767822265625, "loss": 0.0888, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.15568089485168457, "rewards/margins": 0.2737763524055481, "rewards/rejected": -0.42945727705955505, "step": 2920 }, { "epoch": 0.56, "learning_rate": 2.4293651219330614e-06, "logits/chosen": -1.7054529190063477, "logits/rejected": -1.0471127033233643, "logps/chosen": -643.4332275390625, "logps/rejected": -1266.702880859375, "loss": 0.079, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.15120860934257507, "rewards/margins": 0.2843385338783264, "rewards/rejected": -0.4355471134185791, "step": 2930 }, { "epoch": 0.56, "learning_rate": 2.4127512582437486e-06, "logits/chosen": -1.6374223232269287, "logits/rejected": -1.173452615737915, "logps/chosen": -560.0069580078125, "logps/rejected": -1163.2095947265625, "loss": 0.0735, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.13395628333091736, "rewards/margins": 0.2982594072818756, "rewards/rejected": -0.43221569061279297, "step": 2940 }, { "epoch": 0.56, "learning_rate": 2.3961412515904337e-06, "logits/chosen": -1.756450891494751, "logits/rejected": -0.9547022581100464, "logps/chosen": -693.9270629882812, "logps/rejected": -1390.892822265625, "loss": 0.0401, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.15946152806282043, "rewards/margins": 0.3631231188774109, "rewards/rejected": -0.5225846767425537, "step": 2950 }, { "epoch": 0.56, "learning_rate": 2.3795358362575618e-06, "logits/chosen": -1.6137168407440186, "logits/rejected": -1.2999922037124634, "logps/chosen": -619.2698974609375, "logps/rejected": -1225.082275390625, "loss": 0.0917, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.22298340499401093, "rewards/margins": 0.22996529936790466, "rewards/rejected": -0.452948659658432, "step": 2960 }, { "epoch": 0.57, "learning_rate": 2.3629357463266e-06, "logits/chosen": -1.5306415557861328, "logits/rejected": -0.989396870136261, "logps/chosen": -595.4459228515625, "logps/rejected": -1328.7762451171875, "loss": 0.0522, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.16768696904182434, "rewards/margins": 0.3273896276950836, "rewards/rejected": -0.49507659673690796, "step": 2970 }, { "epoch": 0.57, "learning_rate": 2.346341715643601e-06, "logits/chosen": -1.6781136989593506, "logits/rejected": -0.9793834686279297, "logps/chosen": -635.3900756835938, "logps/rejected": -1335.785888671875, "loss": 0.0636, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.18477866053581238, "rewards/margins": 0.3320106863975525, "rewards/rejected": -0.5167893171310425, "step": 2980 }, { "epoch": 0.57, "learning_rate": 2.32975447778675e-06, "logits/chosen": -1.6208549737930298, "logits/rejected": -1.226190447807312, "logps/chosen": -601.8873291015625, "logps/rejected": -1200.184326171875, "loss": 0.0824, "rewards/accuracies": 0.875, "rewards/chosen": -0.17440524697303772, "rewards/margins": 0.2568731904029846, "rewards/rejected": -0.43127840757369995, "step": 2990 }, { "epoch": 0.57, "learning_rate": 2.3131747660339396e-06, "logits/chosen": -1.2349191904067993, "logits/rejected": -0.8947417140007019, "logps/chosen": -656.5274658203125, "logps/rejected": -1464.7275390625, "loss": 0.0467, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.21201591193675995, "rewards/margins": 0.3637722134590149, "rewards/rejected": -0.575788140296936, "step": 3000 }, { "epoch": 0.57, "learning_rate": 2.296603313330355e-06, "logits/chosen": -1.4812730550765991, "logits/rejected": -1.0813909769058228, "logps/chosen": -688.93115234375, "logps/rejected": -1384.00439453125, "loss": 0.0604, "rewards/accuracies": 0.875, "rewards/chosen": -0.2448856085538864, "rewards/margins": 0.29885345697402954, "rewards/rejected": -0.5437390208244324, "step": 3010 }, { "epoch": 0.58, "learning_rate": 2.280040852256068e-06, "logits/chosen": -1.5478495359420776, "logits/rejected": -1.0680512189865112, "logps/chosen": -661.108154296875, "logps/rejected": -1224.5616455078125, "loss": 0.0747, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.213933065533638, "rewards/margins": 0.2910975515842438, "rewards/rejected": -0.5050305724143982, "step": 3020 }, { "epoch": 0.58, "learning_rate": 2.2634881149936576e-06, "logits/chosen": -1.3864612579345703, "logits/rejected": -0.871688187122345, "logps/chosen": -667.6625366210938, "logps/rejected": -1298.525390625, "loss": 0.0563, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.21446280181407928, "rewards/margins": 0.3170791268348694, "rewards/rejected": -0.5315419435501099, "step": 3030 }, { "epoch": 0.58, "learning_rate": 2.246945833295836e-06, "logits/chosen": -1.7432661056518555, "logits/rejected": -0.9063519239425659, "logps/chosen": -715.7513427734375, "logps/rejected": -1345.704833984375, "loss": 0.0455, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.20654010772705078, "rewards/margins": 0.3397827744483948, "rewards/rejected": -0.5463228821754456, "step": 3040 }, { "epoch": 0.58, "learning_rate": 2.230414738453104e-06, "logits/chosen": -1.5082430839538574, "logits/rejected": -0.8369771838188171, "logps/chosen": -656.2059936523438, "logps/rejected": -1308.8939208984375, "loss": 0.0631, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.18457624316215515, "rewards/margins": 0.3191223740577698, "rewards/rejected": -0.5036987066268921, "step": 3050 }, { "epoch": 0.58, "learning_rate": 2.2138955612614206e-06, "logits/chosen": -1.4841004610061646, "logits/rejected": -0.8255208730697632, "logps/chosen": -734.7943725585938, "logps/rejected": -1287.914794921875, "loss": 0.0714, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.20491111278533936, "rewards/margins": 0.31542059779167175, "rewards/rejected": -0.5203317403793335, "step": 3060 }, { "epoch": 0.58, "learning_rate": 2.1973890319898965e-06, "logits/chosen": -1.4568408727645874, "logits/rejected": -0.9991839528083801, "logps/chosen": -693.023193359375, "logps/rejected": -1258.9954833984375, "loss": 0.0909, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.21499352157115936, "rewards/margins": 0.2714117467403412, "rewards/rejected": -0.48640528321266174, "step": 3070 }, { "epoch": 0.59, "learning_rate": 2.1808958803485134e-06, "logits/chosen": -1.5070273876190186, "logits/rejected": -1.0528619289398193, "logps/chosen": -634.9210815429688, "logps/rejected": -1429.14892578125, "loss": 0.0652, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.19342589378356934, "rewards/margins": 0.32702723145484924, "rewards/rejected": -0.5204530954360962, "step": 3080 }, { "epoch": 0.59, "learning_rate": 2.1644168354558623e-06, "logits/chosen": -1.6371214389801025, "logits/rejected": -1.3708134889602661, "logps/chosen": -541.59765625, "logps/rejected": -1365.823486328125, "loss": 0.0669, "rewards/accuracies": 0.875, "rewards/chosen": -0.16047021746635437, "rewards/margins": 0.34046122431755066, "rewards/rejected": -0.500931441783905, "step": 3090 }, { "epoch": 0.59, "learning_rate": 2.1479526258069086e-06, "logits/chosen": -1.6579856872558594, "logits/rejected": -1.1512315273284912, "logps/chosen": -600.4952392578125, "logps/rejected": -1301.325439453125, "loss": 0.074, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.16126033663749695, "rewards/margins": 0.2973024845123291, "rewards/rejected": -0.45856285095214844, "step": 3100 }, { "epoch": 0.59, "learning_rate": 2.1315039792407975e-06, "logits/chosen": -1.5018136501312256, "logits/rejected": -1.0350725650787354, "logps/chosen": -703.773193359375, "logps/rejected": -1416.766357421875, "loss": 0.0452, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.16547882556915283, "rewards/margins": 0.33334147930145264, "rewards/rejected": -0.49882030487060547, "step": 3110 }, { "epoch": 0.59, "learning_rate": 2.115071622908666e-06, "logits/chosen": -1.3968632221221924, "logits/rejected": -0.7743858098983765, "logps/chosen": -599.1119384765625, "logps/rejected": -1345.932861328125, "loss": 0.0504, "rewards/accuracies": 0.875, "rewards/chosen": -0.1530262678861618, "rewards/margins": 0.3215201497077942, "rewards/rejected": -0.4745463728904724, "step": 3120 }, { "epoch": 0.6, "learning_rate": 2.0986562832415063e-06, "logits/chosen": -1.4558215141296387, "logits/rejected": -1.0328631401062012, "logps/chosen": -573.017333984375, "logps/rejected": -1261.2379150390625, "loss": 0.0855, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.14629222452640533, "rewards/margins": 0.30260735750198364, "rewards/rejected": -0.4488995671272278, "step": 3130 }, { "epoch": 0.6, "learning_rate": 2.082258685918047e-06, "logits/chosen": -1.8028316497802734, "logits/rejected": -1.0838000774383545, "logps/chosen": -706.5945434570312, "logps/rejected": -1400.1011962890625, "loss": 0.0433, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.18354645371437073, "rewards/margins": 0.3293781578540802, "rewards/rejected": -0.5129246115684509, "step": 3140 }, { "epoch": 0.6, "learning_rate": 2.0658795558326745e-06, "logits/chosen": -1.8446155786514282, "logits/rejected": -1.05807626247406, "logps/chosen": -743.2035522460938, "logps/rejected": -1389.4332275390625, "loss": 0.0609, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.18256376683712006, "rewards/margins": 0.33897337317466736, "rewards/rejected": -0.5215371251106262, "step": 3150 }, { "epoch": 0.6, "learning_rate": 2.049519617063389e-06, "logits/chosen": -1.805032730102539, "logits/rejected": -0.8840486407279968, "logps/chosen": -731.5299072265625, "logps/rejected": -1289.933837890625, "loss": 0.0568, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.17051993310451508, "rewards/margins": 0.3140178620815277, "rewards/rejected": -0.4845377802848816, "step": 3160 }, { "epoch": 0.6, "learning_rate": 2.033179592839792e-06, "logits/chosen": -1.3933377265930176, "logits/rejected": -1.25589919090271, "logps/chosen": -505.409423828125, "logps/rejected": -1189.5247802734375, "loss": 0.0853, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.14680811762809753, "rewards/margins": 0.26954832673072815, "rewards/rejected": -0.41635647416114807, "step": 3170 }, { "epoch": 0.61, "learning_rate": 2.0168602055111175e-06, "logits/chosen": -1.468627691268921, "logits/rejected": -1.2611991167068481, "logps/chosen": -578.4532470703125, "logps/rejected": -1260.8515625, "loss": 0.0684, "rewards/accuracies": 0.875, "rewards/chosen": -0.17467227578163147, "rewards/margins": 0.2742113769054413, "rewards/rejected": -0.44888362288475037, "step": 3180 }, { "epoch": 0.61, "learning_rate": 2.0005621765142942e-06, "logits/chosen": -1.692848563194275, "logits/rejected": -1.1767462491989136, "logps/chosen": -616.2111206054688, "logps/rejected": -1344.800048828125, "loss": 0.0624, "rewards/accuracies": 0.875, "rewards/chosen": -0.190839484333992, "rewards/margins": 0.32105207443237305, "rewards/rejected": -0.5118916034698486, "step": 3190 }, { "epoch": 0.61, "learning_rate": 1.9842862263420565e-06, "logits/chosen": -1.5659022331237793, "logits/rejected": -0.8357075452804565, "logps/chosen": -612.3272705078125, "logps/rejected": -1096.1163330078125, "loss": 0.1089, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.17919543385505676, "rewards/margins": 0.2390226423740387, "rewards/rejected": -0.41821813583374023, "step": 3200 }, { "epoch": 0.61, "learning_rate": 1.9680330745110954e-06, "logits/chosen": -1.4714428186416626, "logits/rejected": -0.810672402381897, "logps/chosen": -645.9213256835938, "logps/rejected": -1339.7169189453125, "loss": 0.0485, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.20404461026191711, "rewards/margins": 0.3189607262611389, "rewards/rejected": -0.5230053663253784, "step": 3210 }, { "epoch": 0.61, "learning_rate": 1.9518034395302413e-06, "logits/chosen": -1.4850611686706543, "logits/rejected": -1.0046916007995605, "logps/chosen": -649.9691772460938, "logps/rejected": -1234.4267578125, "loss": 0.0648, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.21667401492595673, "rewards/margins": 0.28007322549819946, "rewards/rejected": -0.4967471957206726, "step": 3220 }, { "epoch": 0.62, "learning_rate": 1.9355980388687145e-06, "logits/chosen": -1.521734595298767, "logits/rejected": -0.9709982872009277, "logps/chosen": -611.9412231445312, "logps/rejected": -1343.318115234375, "loss": 0.0673, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.19162626564502716, "rewards/margins": 0.3469206690788269, "rewards/rejected": -0.5385469198226929, "step": 3230 }, { "epoch": 0.62, "learning_rate": 1.9194175889243942e-06, "logits/chosen": -1.5944442749023438, "logits/rejected": -0.9859330058097839, "logps/chosen": -717.7816162109375, "logps/rejected": -1519.260986328125, "loss": 0.0467, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.22277486324310303, "rewards/margins": 0.34965479373931885, "rewards/rejected": -0.5724297165870667, "step": 3240 }, { "epoch": 0.62, "learning_rate": 1.903262804992156e-06, "logits/chosen": -1.6996046304702759, "logits/rejected": -0.7255717515945435, "logps/chosen": -795.25, "logps/rejected": -1436.568603515625, "loss": 0.0328, "rewards/accuracies": 1.0, "rewards/chosen": -0.22043243050575256, "rewards/margins": 0.35897698998451233, "rewards/rejected": -0.5794094204902649, "step": 3250 }, { "epoch": 0.62, "learning_rate": 1.8871344012322504e-06, "logits/chosen": -1.5481312274932861, "logits/rejected": -0.7875708341598511, "logps/chosen": -722.6868896484375, "logps/rejected": -1411.4429931640625, "loss": 0.0572, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.22615578770637512, "rewards/margins": 0.3459186851978302, "rewards/rejected": -0.5720744729042053, "step": 3260 }, { "epoch": 0.62, "learning_rate": 1.8710330906387288e-06, "logits/chosen": -1.4675686359405518, "logits/rejected": -0.9357894659042358, "logps/chosen": -694.9276123046875, "logps/rejected": -1344.5860595703125, "loss": 0.067, "rewards/accuracies": 0.875, "rewards/chosen": -0.21511349081993103, "rewards/margins": 0.31824225187301636, "rewards/rejected": -0.5333557724952698, "step": 3270 }, { "epoch": 0.62, "learning_rate": 1.8549595850079272e-06, "logits/chosen": -1.5443395376205444, "logits/rejected": -0.9139049649238586, "logps/chosen": -652.799560546875, "logps/rejected": -1310.782470703125, "loss": 0.0584, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.19704267382621765, "rewards/margins": 0.312825083732605, "rewards/rejected": -0.509867787361145, "step": 3280 }, { "epoch": 0.63, "learning_rate": 1.8389145949069953e-06, "logits/chosen": -1.6866626739501953, "logits/rejected": -0.9042354822158813, "logps/chosen": -759.4832763671875, "logps/rejected": -1389.257568359375, "loss": 0.0524, "rewards/accuracies": 0.875, "rewards/chosen": -0.20711025595664978, "rewards/margins": 0.34287315607070923, "rewards/rejected": -0.5499833822250366, "step": 3290 }, { "epoch": 0.63, "learning_rate": 1.8228988296424877e-06, "logits/chosen": -1.5796806812286377, "logits/rejected": -0.8186532855033875, "logps/chosen": -722.0853271484375, "logps/rejected": -1205.07177734375, "loss": 0.0932, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1870838701725006, "rewards/margins": 0.267849862575531, "rewards/rejected": -0.4549337327480316, "step": 3300 }, { "epoch": 0.63, "learning_rate": 1.806912997229008e-06, "logits/chosen": -1.510692834854126, "logits/rejected": -0.9041181802749634, "logps/chosen": -691.3726806640625, "logps/rejected": -1267.9361572265625, "loss": 0.0598, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.21052590012550354, "rewards/margins": 0.30054470896720886, "rewards/rejected": -0.5110706090927124, "step": 3310 }, { "epoch": 0.63, "learning_rate": 1.7909578043579037e-06, "logits/chosen": -1.4253116846084595, "logits/rejected": -0.8056136965751648, "logps/chosen": -646.0543823242188, "logps/rejected": -1144.4342041015625, "loss": 0.0866, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.21351024508476257, "rewards/margins": 0.2579920291900635, "rewards/rejected": -0.4715022146701813, "step": 3320 }, { "epoch": 0.63, "learning_rate": 1.7750339563660346e-06, "logits/chosen": -1.5773307085037231, "logits/rejected": -0.722690761089325, "logps/chosen": -700.0763549804688, "logps/rejected": -1403.1580810546875, "loss": 0.0617, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.21982452273368835, "rewards/margins": 0.33400195837020874, "rewards/rejected": -0.5538265109062195, "step": 3330 }, { "epoch": 0.64, "learning_rate": 1.759142157204583e-06, "logits/chosen": -1.1075875759124756, "logits/rejected": -0.9842613935470581, "logps/chosen": -617.412353515625, "logps/rejected": -1335.633056640625, "loss": 0.0695, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.21973755955696106, "rewards/margins": 0.2990453541278839, "rewards/rejected": -0.5187829732894897, "step": 3340 }, { "epoch": 0.64, "learning_rate": 1.7432831094079357e-06, "logits/chosen": -1.3947752714157104, "logits/rejected": -0.9352839589118958, "logps/chosen": -711.1446533203125, "logps/rejected": -1314.2042236328125, "loss": 0.0829, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2424613982439041, "rewards/margins": 0.2729516923427582, "rewards/rejected": -0.5154131650924683, "step": 3350 }, { "epoch": 0.64, "learning_rate": 1.7274575140626318e-06, "logits/chosen": -1.620173692703247, "logits/rejected": -1.1578094959259033, "logps/chosen": -678.2091064453125, "logps/rejected": -1377.6026611328125, "loss": 0.0666, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.21755430102348328, "rewards/margins": 0.3121327757835388, "rewards/rejected": -0.5296871066093445, "step": 3360 }, { "epoch": 0.64, "learning_rate": 1.7116660707763637e-06, "logits/chosen": -1.5007514953613281, "logits/rejected": -0.9255658388137817, "logps/chosen": -656.914794921875, "logps/rejected": -1312.096435546875, "loss": 0.0762, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.19928193092346191, "rewards/margins": 0.29995498061180115, "rewards/rejected": -0.49923691153526306, "step": 3370 }, { "epoch": 0.64, "learning_rate": 1.695909477647054e-06, "logits/chosen": -1.5323008298873901, "logits/rejected": -0.8067652583122253, "logps/chosen": -598.3023071289062, "logps/rejected": -1327.044921875, "loss": 0.0575, "rewards/accuracies": 0.875, "rewards/chosen": -0.16166332364082336, "rewards/margins": 0.3267229497432709, "rewards/rejected": -0.4883863031864166, "step": 3380 }, { "epoch": 0.65, "learning_rate": 1.6801884312319893e-06, "logits/chosen": -1.3339112997055054, "logits/rejected": -0.9970804452896118, "logps/chosen": -614.9922485351562, "logps/rejected": -1234.631591796875, "loss": 0.0734, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1723160296678543, "rewards/margins": 0.2858824133872986, "rewards/rejected": -0.4581984579563141, "step": 3390 }, { "epoch": 0.65, "learning_rate": 1.6645036265170314e-06, "logits/chosen": -1.3838064670562744, "logits/rejected": -0.9229308366775513, "logps/chosen": -748.576904296875, "logps/rejected": -1306.68212890625, "loss": 0.095, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2202054262161255, "rewards/margins": 0.27225932478904724, "rewards/rejected": -0.49246472120285034, "step": 3400 }, { "epoch": 0.65, "learning_rate": 1.648855756885893e-06, "logits/chosen": -1.4950006008148193, "logits/rejected": -1.013434648513794, "logps/chosen": -718.4817504882812, "logps/rejected": -1454.3203125, "loss": 0.0622, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.22334542870521545, "rewards/margins": 0.3394310474395752, "rewards/rejected": -0.5627764463424683, "step": 3410 }, { "epoch": 0.65, "learning_rate": 1.633245514089482e-06, "logits/chosen": -1.6091363430023193, "logits/rejected": -1.1225229501724243, "logps/chosen": -668.1934204101562, "logps/rejected": -1322.8450927734375, "loss": 0.0616, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1633572280406952, "rewards/margins": 0.3172319829463959, "rewards/rejected": -0.48058921098709106, "step": 3420 }, { "epoch": 0.65, "learning_rate": 1.6176735882153284e-06, "logits/chosen": -1.6472145318984985, "logits/rejected": -1.1272056102752686, "logps/chosen": -654.9362182617188, "logps/rejected": -1358.408447265625, "loss": 0.0646, "rewards/accuracies": 0.875, "rewards/chosen": -0.19133299589157104, "rewards/margins": 0.30245405435562134, "rewards/rejected": -0.49378710985183716, "step": 3430 }, { "epoch": 0.66, "learning_rate": 1.6021406676570667e-06, "logits/chosen": -1.4853413105010986, "logits/rejected": -0.9434601068496704, "logps/chosen": -712.5463256835938, "logps/rejected": -1411.4658203125, "loss": 0.0628, "rewards/accuracies": 0.875, "rewards/chosen": -0.20891955494880676, "rewards/margins": 0.3422708511352539, "rewards/rejected": -0.5511903166770935, "step": 3440 }, { "epoch": 0.66, "learning_rate": 1.5866474390840126e-06, "logits/chosen": -1.6828727722167969, "logits/rejected": -0.8747915029525757, "logps/chosen": -718.73486328125, "logps/rejected": -1527.1334228515625, "loss": 0.0433, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.19726070761680603, "rewards/margins": 0.3979165554046631, "rewards/rejected": -0.5951772928237915, "step": 3450 }, { "epoch": 0.66, "learning_rate": 1.5711945874108053e-06, "logits/chosen": -1.3750511407852173, "logits/rejected": -0.9035806655883789, "logps/chosen": -599.4134521484375, "logps/rejected": -1339.314208984375, "loss": 0.0769, "rewards/accuracies": 0.875, "rewards/chosen": -0.17058123648166656, "rewards/margins": 0.3418346345424652, "rewards/rejected": -0.512415885925293, "step": 3460 }, { "epoch": 0.66, "learning_rate": 1.5557827957671249e-06, "logits/chosen": -1.5215259790420532, "logits/rejected": -1.087141752243042, "logps/chosen": -661.077880859375, "logps/rejected": -1228.581787109375, "loss": 0.0905, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.21272769570350647, "rewards/margins": 0.26741451025009155, "rewards/rejected": -0.480142205953598, "step": 3470 }, { "epoch": 0.66, "learning_rate": 1.5404127454674994e-06, "logits/chosen": -1.716884970664978, "logits/rejected": -0.9167481660842896, "logps/chosen": -776.3599243164062, "logps/rejected": -1337.857666015625, "loss": 0.0815, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2485647201538086, "rewards/margins": 0.28456616401672363, "rewards/rejected": -0.5331308841705322, "step": 3480 }, { "epoch": 0.66, "learning_rate": 1.5250851159811809e-06, "logits/chosen": -1.3097341060638428, "logits/rejected": -1.1387548446655273, "logps/chosen": -596.4869995117188, "logps/rejected": -1371.023193359375, "loss": 0.068, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1898602694272995, "rewards/margins": 0.3254929780960083, "rewards/rejected": -0.515353262424469, "step": 3490 }, { "epoch": 0.67, "learning_rate": 1.509800584902108e-06, "logits/chosen": -1.5502290725708008, "logits/rejected": -0.9857848882675171, "logps/chosen": -671.3793334960938, "logps/rejected": -1286.765625, "loss": 0.0897, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.21347641944885254, "rewards/margins": 0.252995103597641, "rewards/rejected": -0.4664715826511383, "step": 3500 }, { "epoch": 0.67, "learning_rate": 1.4945598279189565e-06, "logits/chosen": -1.3999189138412476, "logits/rejected": -1.0364853143692017, "logps/chosen": -541.5253295898438, "logps/rejected": -1361.39306640625, "loss": 0.0509, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.15634627640247345, "rewards/margins": 0.3528437316417694, "rewards/rejected": -0.5091899633407593, "step": 3510 }, { "epoch": 0.67, "learning_rate": 1.4793635187852622e-06, "logits/chosen": -1.800374984741211, "logits/rejected": -1.2006888389587402, "logps/chosen": -622.7084350585938, "logps/rejected": -1129.7310791015625, "loss": 0.078, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.17157098650932312, "rewards/margins": 0.2593713402748108, "rewards/rejected": -0.4309422969818115, "step": 3520 }, { "epoch": 0.67, "learning_rate": 1.4642123292896406e-06, "logits/chosen": -1.7451728582382202, "logits/rejected": -0.9990085363388062, "logps/chosen": -551.0404052734375, "logps/rejected": -1164.4862060546875, "loss": 0.0647, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1497395783662796, "rewards/margins": 0.30090680718421936, "rewards/rejected": -0.45064640045166016, "step": 3530 }, { "epoch": 0.67, "learning_rate": 1.4491069292260867e-06, "logits/chosen": -1.7180376052856445, "logits/rejected": -1.0250861644744873, "logps/chosen": -687.5936889648438, "logps/rejected": -1251.387451171875, "loss": 0.0717, "rewards/accuracies": 0.875, "rewards/chosen": -0.18078365921974182, "rewards/margins": 0.2989780306816101, "rewards/rejected": -0.47976168990135193, "step": 3540 }, { "epoch": 0.68, "learning_rate": 1.4340479863643658e-06, "logits/chosen": -1.4122458696365356, "logits/rejected": -1.0276893377304077, "logps/chosen": -746.4142456054688, "logps/rejected": -1390.8551025390625, "loss": 0.0702, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.19428804516792297, "rewards/margins": 0.2959034740924835, "rewards/rejected": -0.4901915192604065, "step": 3550 }, { "epoch": 0.68, "learning_rate": 1.4190361664204936e-06, "logits/chosen": -1.545143485069275, "logits/rejected": -1.0826115608215332, "logps/chosen": -602.2059936523438, "logps/rejected": -1251.728759765625, "loss": 0.0839, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.16832391917705536, "rewards/margins": 0.2858714163303375, "rewards/rejected": -0.4541953206062317, "step": 3560 }, { "epoch": 0.68, "learning_rate": 1.4040721330273063e-06, "logits/chosen": -1.661960244178772, "logits/rejected": -1.3034007549285889, "logps/chosen": -693.1444091796875, "logps/rejected": -1337.0006103515625, "loss": 0.0833, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.21670281887054443, "rewards/margins": 0.26909923553466797, "rewards/rejected": -0.4858019948005676, "step": 3570 }, { "epoch": 0.68, "learning_rate": 1.3891565477051242e-06, "logits/chosen": -1.6313354969024658, "logits/rejected": -1.0461688041687012, "logps/chosen": -574.7815551757812, "logps/rejected": -1244.134033203125, "loss": 0.0504, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1575133502483368, "rewards/margins": 0.31078487634658813, "rewards/rejected": -0.4682982563972473, "step": 3580 }, { "epoch": 0.68, "learning_rate": 1.3742900698325034e-06, "logits/chosen": -1.6138801574707031, "logits/rejected": -0.969835638999939, "logps/chosen": -736.7557983398438, "logps/rejected": -1412.3299560546875, "loss": 0.0537, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2180413454771042, "rewards/margins": 0.32191744446754456, "rewards/rejected": -0.5399588346481323, "step": 3590 }, { "epoch": 0.69, "learning_rate": 1.3594733566170925e-06, "logits/chosen": -1.5131175518035889, "logits/rejected": -0.9915425181388855, "logps/chosen": -801.5447387695312, "logps/rejected": -1574.994140625, "loss": 0.055, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.2719317674636841, "rewards/margins": 0.35427626967430115, "rewards/rejected": -0.6262079477310181, "step": 3600 }, { "epoch": 0.69, "learning_rate": 1.3447070630665771e-06, "logits/chosen": -1.4314167499542236, "logits/rejected": -0.9526249170303345, "logps/chosen": -641.8742065429688, "logps/rejected": -1273.2801513671875, "loss": 0.0583, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.21152277290821075, "rewards/margins": 0.302289217710495, "rewards/rejected": -0.5138119459152222, "step": 3610 }, { "epoch": 0.69, "learning_rate": 1.329991841959717e-06, "logits/chosen": -1.6229140758514404, "logits/rejected": -0.997315526008606, "logps/chosen": -706.7448120117188, "logps/rejected": -1259.075927734375, "loss": 0.0891, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2167361080646515, "rewards/margins": 0.2824261486530304, "rewards/rejected": -0.4991622567176819, "step": 3620 }, { "epoch": 0.69, "learning_rate": 1.3153283438175036e-06, "logits/chosen": -1.5434691905975342, "logits/rejected": -1.0687668323516846, "logps/chosen": -678.1273193359375, "logps/rejected": -1346.504150390625, "loss": 0.0733, "rewards/accuracies": 0.875, "rewards/chosen": -0.22936749458312988, "rewards/margins": 0.3146396279335022, "rewards/rejected": -0.5440071225166321, "step": 3630 }, { "epoch": 0.69, "learning_rate": 1.3007172168743854e-06, "logits/chosen": -1.5151026248931885, "logits/rejected": -1.2111207246780396, "logps/chosen": -617.3922729492188, "logps/rejected": -1312.60986328125, "loss": 0.0612, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.21233785152435303, "rewards/margins": 0.29888466000556946, "rewards/rejected": -0.5112224817276001, "step": 3640 }, { "epoch": 0.7, "learning_rate": 1.2861591070496193e-06, "logits/chosen": -1.5453293323516846, "logits/rejected": -0.8888516426086426, "logps/chosen": -743.7242431640625, "logps/rejected": -1490.795654296875, "loss": 0.0782, "rewards/accuracies": 0.875, "rewards/chosen": -0.24507150053977966, "rewards/margins": 0.3426440358161926, "rewards/rejected": -0.5877156257629395, "step": 3650 }, { "epoch": 0.7, "learning_rate": 1.271654657918722e-06, "logits/chosen": -1.7394828796386719, "logits/rejected": -1.1618573665618896, "logps/chosen": -536.0321655273438, "logps/rejected": -1236.074951171875, "loss": 0.0636, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.15857215225696564, "rewards/margins": 0.3111765384674072, "rewards/rejected": -0.46974867582321167, "step": 3660 }, { "epoch": 0.7, "learning_rate": 1.2572045106850051e-06, "logits/chosen": -1.6560094356536865, "logits/rejected": -0.9025988578796387, "logps/chosen": -661.8654174804688, "logps/rejected": -1258.066650390625, "loss": 0.0749, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.18976400792598724, "rewards/margins": 0.3110305964946747, "rewards/rejected": -0.5007945895195007, "step": 3670 }, { "epoch": 0.7, "learning_rate": 1.2428093041512418e-06, "logits/chosen": -1.7884883880615234, "logits/rejected": -1.1757820844650269, "logps/chosen": -727.03564453125, "logps/rejected": -1301.129150390625, "loss": 0.08, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1926560401916504, "rewards/margins": 0.28592005372047424, "rewards/rejected": -0.478576123714447, "step": 3680 }, { "epoch": 0.7, "learning_rate": 1.2284696746914216e-06, "logits/chosen": -1.497154951095581, "logits/rejected": -1.116506814956665, "logps/chosen": -680.4603271484375, "logps/rejected": -1272.1434326171875, "loss": 0.0745, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.20678162574768066, "rewards/margins": 0.25609079003334045, "rewards/rejected": -0.4628724455833435, "step": 3690 }, { "epoch": 0.7, "learning_rate": 1.2141862562226164e-06, "logits/chosen": -1.6773204803466797, "logits/rejected": -0.9909335374832153, "logps/chosen": -633.2793579101562, "logps/rejected": -1360.771728515625, "loss": 0.0668, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.1755729764699936, "rewards/margins": 0.3312123715877533, "rewards/rejected": -0.5067853927612305, "step": 3700 }, { "epoch": 0.71, "learning_rate": 1.1999596801769617e-06, "logits/chosen": -1.7998435497283936, "logits/rejected": -0.9226717948913574, "logps/chosen": -769.0552368164062, "logps/rejected": -1420.398193359375, "loss": 0.0709, "rewards/accuracies": 0.875, "rewards/chosen": -0.21222510933876038, "rewards/margins": 0.3229183554649353, "rewards/rejected": -0.5351434946060181, "step": 3710 }, { "epoch": 0.71, "learning_rate": 1.185790575473738e-06, "logits/chosen": -1.606642484664917, "logits/rejected": -1.2084159851074219, "logps/chosen": -753.2423706054688, "logps/rejected": -1257.906982421875, "loss": 0.0969, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.23726069927215576, "rewards/margins": 0.23133531212806702, "rewards/rejected": -0.4685959815979004, "step": 3720 }, { "epoch": 0.71, "learning_rate": 1.1716795684915728e-06, "logits/chosen": -1.6280252933502197, "logits/rejected": -0.9002641439437866, "logps/chosen": -669.9718627929688, "logps/rejected": -1360.99658203125, "loss": 0.0626, "rewards/accuracies": 0.875, "rewards/chosen": -0.16807326674461365, "rewards/margins": 0.33500486612319946, "rewards/rejected": -0.5030781626701355, "step": 3730 }, { "epoch": 0.71, "learning_rate": 1.1576272830407418e-06, "logits/chosen": -1.6571365594863892, "logits/rejected": -1.1870951652526855, "logps/chosen": -640.5313110351562, "logps/rejected": -1359.805419921875, "loss": 0.0782, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1662045419216156, "rewards/margins": 0.3272945284843445, "rewards/rejected": -0.49349913001060486, "step": 3740 }, { "epoch": 0.71, "learning_rate": 1.1436343403356019e-06, "logits/chosen": -1.714442253112793, "logits/rejected": -0.9745224714279175, "logps/chosen": -706.4690551757812, "logps/rejected": -1396.648681640625, "loss": 0.0649, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1968952864408493, "rewards/margins": 0.32389289140701294, "rewards/rejected": -0.5207881927490234, "step": 3750 }, { "epoch": 0.72, "learning_rate": 1.129701358967123e-06, "logits/chosen": -1.73860764503479, "logits/rejected": -0.9712656736373901, "logps/chosen": -629.3460083007812, "logps/rejected": -1206.01953125, "loss": 0.0828, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.14328864216804504, "rewards/margins": 0.2993341088294983, "rewards/rejected": -0.44262275099754333, "step": 3760 }, { "epoch": 0.72, "learning_rate": 1.11582895487554e-06, "logits/chosen": -1.5468307733535767, "logits/rejected": -0.9999405741691589, "logps/chosen": -550.5922241210938, "logps/rejected": -1237.1678466796875, "loss": 0.0858, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.16123315691947937, "rewards/margins": 0.283268004655838, "rewards/rejected": -0.4445011615753174, "step": 3770 }, { "epoch": 0.72, "learning_rate": 1.1020177413231334e-06, "logits/chosen": -1.824406385421753, "logits/rejected": -1.0949231386184692, "logps/chosen": -692.0006103515625, "logps/rejected": -1357.0108642578125, "loss": 0.0692, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.18804143369197845, "rewards/margins": 0.304176390171051, "rewards/rejected": -0.4922178387641907, "step": 3780 }, { "epoch": 0.72, "learning_rate": 1.0882683288671041e-06, "logits/chosen": -1.5519713163375854, "logits/rejected": -1.2418644428253174, "logps/chosen": -621.3736572265625, "logps/rejected": -1320.0389404296875, "loss": 0.0743, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.15430279076099396, "rewards/margins": 0.32646626234054565, "rewards/rejected": -0.4807690978050232, "step": 3790 }, { "epoch": 0.72, "learning_rate": 1.0745813253325957e-06, "logits/chosen": -1.5042496919631958, "logits/rejected": -1.1541450023651123, "logps/chosen": -581.3851318359375, "logps/rejected": -1167.1385498046875, "loss": 0.0711, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17610274255275726, "rewards/margins": 0.2804592549800873, "rewards/rejected": -0.45656198263168335, "step": 3800 }, { "epoch": 0.73, "learning_rate": 1.0609573357858166e-06, "logits/chosen": -1.8346351385116577, "logits/rejected": -0.9841367602348328, "logps/chosen": -615.1905517578125, "logps/rejected": -1213.364501953125, "loss": 0.1024, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.18721064925193787, "rewards/margins": 0.26040130853652954, "rewards/rejected": -0.4476119577884674, "step": 3810 }, { "epoch": 0.73, "learning_rate": 1.0473969625072922e-06, "logits/chosen": -1.4209061861038208, "logits/rejected": -0.9002727270126343, "logps/chosen": -687.7305908203125, "logps/rejected": -1552.115478515625, "loss": 0.0392, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.23270201683044434, "rewards/margins": 0.35259899497032166, "rewards/rejected": -0.5853010416030884, "step": 3820 }, { "epoch": 0.73, "learning_rate": 1.0339008049652427e-06, "logits/chosen": -1.407459020614624, "logits/rejected": -0.9791488647460938, "logps/chosen": -621.3890380859375, "logps/rejected": -1234.590087890625, "loss": 0.0653, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17459668219089508, "rewards/margins": 0.29255175590515137, "rewards/rejected": -0.46714845299720764, "step": 3830 }, { "epoch": 0.73, "learning_rate": 1.0204694597890814e-06, "logits/chosen": -1.6580352783203125, "logits/rejected": -1.1533594131469727, "logps/chosen": -676.6832275390625, "logps/rejected": -1325.892822265625, "loss": 0.0564, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.20397302508354187, "rewards/margins": 0.29500606656074524, "rewards/rejected": -0.4989790916442871, "step": 3840 }, { "epoch": 0.73, "learning_rate": 1.0071035207430352e-06, "logits/chosen": -1.2952769994735718, "logits/rejected": -0.9416033029556274, "logps/chosen": -604.2355346679688, "logps/rejected": -1212.9393310546875, "loss": 0.0484, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.16119088232517242, "rewards/margins": 0.2786763310432434, "rewards/rejected": -0.43986719846725464, "step": 3850 }, { "epoch": 0.74, "learning_rate": 9.938035786999018e-07, "logits/chosen": -1.5834907293319702, "logits/rejected": -1.082273244857788, "logps/chosen": -560.4159545898438, "logps/rejected": -1141.927490234375, "loss": 0.1234, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.16034545004367828, "rewards/margins": 0.24875816702842712, "rewards/rejected": -0.4091036319732666, "step": 3860 }, { "epoch": 0.74, "learning_rate": 9.805702216149252e-07, "logits/chosen": -1.6294996738433838, "logits/rejected": -0.9513868093490601, "logps/chosen": -647.7396240234375, "logps/rejected": -1253.830810546875, "loss": 0.0869, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1819881945848465, "rewards/margins": 0.3004476726055145, "rewards/rejected": -0.48243585228919983, "step": 3870 }, { "epoch": 0.74, "learning_rate": 9.674040344998056e-07, "logits/chosen": -1.6295925378799438, "logits/rejected": -1.107329249382019, "logps/chosen": -690.513916015625, "logps/rejected": -1182.1849365234375, "loss": 0.1012, "rewards/accuracies": 0.75, "rewards/chosen": -0.23783211410045624, "rewards/margins": 0.24144688248634338, "rewards/rejected": -0.47927895188331604, "step": 3880 }, { "epoch": 0.74, "learning_rate": 9.543055993968339e-07, "logits/chosen": -1.8603365421295166, "logits/rejected": -1.189885139465332, "logps/chosen": -682.6376953125, "logps/rejected": -1384.593994140625, "loss": 0.0565, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2013491690158844, "rewards/margins": 0.3509669601917267, "rewards/rejected": -0.5523160696029663, "step": 3890 }, { "epoch": 0.74, "learning_rate": 9.412754953531664e-07, "logits/chosen": -1.4683700799942017, "logits/rejected": -0.7776457071304321, "logps/chosen": -579.8306274414062, "logps/rejected": -1226.8836669921875, "loss": 0.0879, "rewards/accuracies": 0.75, "rewards/chosen": -0.1834620088338852, "rewards/margins": 0.3038709759712219, "rewards/rejected": -0.4873329699039459, "step": 3900 }, { "epoch": 0.74, "learning_rate": 9.283142983952231e-07, "logits/chosen": -1.4519298076629639, "logits/rejected": -0.9826549291610718, "logps/chosen": -702.67578125, "logps/rejected": -1460.3287353515625, "loss": 0.0591, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.22127850353717804, "rewards/margins": 0.35032838582992554, "rewards/rejected": -0.5716068744659424, "step": 3910 }, { "epoch": 0.75, "learning_rate": 9.154225815032242e-07, "logits/chosen": -1.4828869104385376, "logits/rejected": -1.0451923608779907, "logps/chosen": -605.3231201171875, "logps/rejected": -1357.6131591796875, "loss": 0.0581, "rewards/accuracies": 0.875, "rewards/chosen": -0.20290470123291016, "rewards/margins": 0.34218138456344604, "rewards/rejected": -0.5450860857963562, "step": 3920 }, { "epoch": 0.75, "learning_rate": 9.026009145858608e-07, "logits/chosen": -1.6691789627075195, "logits/rejected": -0.9547937512397766, "logps/chosen": -698.2928466796875, "logps/rejected": -1358.7264404296875, "loss": 0.0853, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.19656509160995483, "rewards/margins": 0.3234030604362488, "rewards/rejected": -0.5199681520462036, "step": 3930 }, { "epoch": 0.75, "learning_rate": 8.898498644550973e-07, "logits/chosen": -1.5353500843048096, "logits/rejected": -1.1531541347503662, "logps/chosen": -608.4739990234375, "logps/rejected": -1238.1771240234375, "loss": 0.0929, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.20965221524238586, "rewards/margins": 0.2650408148765564, "rewards/rejected": -0.47469305992126465, "step": 3940 }, { "epoch": 0.75, "learning_rate": 8.771699948011203e-07, "logits/chosen": -1.4269169569015503, "logits/rejected": -0.8833200335502625, "logps/chosen": -800.2471923828125, "logps/rejected": -1413.006591796875, "loss": 0.0603, "rewards/accuracies": 0.875, "rewards/chosen": -0.2683252692222595, "rewards/margins": 0.30723056197166443, "rewards/rejected": -0.5755558013916016, "step": 3950 }, { "epoch": 0.75, "learning_rate": 8.645618661674144e-07, "logits/chosen": -1.309072494506836, "logits/rejected": -1.012961983680725, "logps/chosen": -652.0698852539062, "logps/rejected": -1263.73876953125, "loss": 0.0663, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.20567500591278076, "rewards/margins": 0.27669185400009155, "rewards/rejected": -0.4823668599128723, "step": 3960 }, { "epoch": 0.76, "learning_rate": 8.520260359259822e-07, "logits/chosen": -1.4296892881393433, "logits/rejected": -0.9210003614425659, "logps/chosen": -601.3060302734375, "logps/rejected": -1292.217529296875, "loss": 0.0465, "rewards/accuracies": 0.875, "rewards/chosen": -0.1769389510154724, "rewards/margins": 0.3420455753803253, "rewards/rejected": -0.5189844965934753, "step": 3970 }, { "epoch": 0.76, "learning_rate": 8.395630582527075e-07, "logits/chosen": -1.7763392925262451, "logits/rejected": -0.804221510887146, "logps/chosen": -707.9012451171875, "logps/rejected": -1312.0638427734375, "loss": 0.048, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.19189570844173431, "rewards/margins": 0.3184513747692108, "rewards/rejected": -0.5103470683097839, "step": 3980 }, { "epoch": 0.76, "learning_rate": 8.271734841028553e-07, "logits/chosen": -1.5501251220703125, "logits/rejected": -1.0809847116470337, "logps/chosen": -464.4537658691406, "logps/rejected": -1207.356689453125, "loss": 0.0676, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.14479857683181763, "rewards/margins": 0.31345051527023315, "rewards/rejected": -0.4582490921020508, "step": 3990 }, { "epoch": 0.76, "learning_rate": 8.148578611867114e-07, "logits/chosen": -1.724912405014038, "logits/rejected": -1.0931422710418701, "logps/chosen": -653.0133056640625, "logps/rejected": -1423.009521484375, "loss": 0.0479, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.19157932698726654, "rewards/margins": 0.3370700478553772, "rewards/rejected": -0.5286494493484497, "step": 4000 }, { "epoch": 0.76, "learning_rate": 8.026167339453792e-07, "logits/chosen": -1.731673002243042, "logits/rejected": -1.0135266780853271, "logps/chosen": -637.0040283203125, "logps/rejected": -1375.421875, "loss": 0.071, "rewards/accuracies": 0.875, "rewards/chosen": -0.19047974050045013, "rewards/margins": 0.344374418258667, "rewards/rejected": -0.5348542332649231, "step": 4010 }, { "epoch": 0.77, "learning_rate": 7.904506435266998e-07, "logits/chosen": -1.5759592056274414, "logits/rejected": -1.023024320602417, "logps/chosen": -563.4696044921875, "logps/rejected": -1408.36767578125, "loss": 0.0499, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1717696487903595, "rewards/margins": 0.3687570095062256, "rewards/rejected": -0.5405266880989075, "step": 4020 }, { "epoch": 0.77, "learning_rate": 7.783601277613378e-07, "logits/chosen": -1.4576669931411743, "logits/rejected": -0.9521520733833313, "logps/chosen": -670.4305419921875, "logps/rejected": -1433.7462158203125, "loss": 0.0331, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.18914173543453217, "rewards/margins": 0.38103678822517395, "rewards/rejected": -0.5701784491539001, "step": 4030 }, { "epoch": 0.77, "learning_rate": 7.66345721139003e-07, "logits/chosen": -1.6169652938842773, "logits/rejected": -1.045633316040039, "logps/chosen": -625.4329223632812, "logps/rejected": -1342.4127197265625, "loss": 0.0622, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.18822208046913147, "rewards/margins": 0.3339303135871887, "rewards/rejected": -0.5221523642539978, "step": 4040 }, { "epoch": 0.77, "learning_rate": 7.544079547848183e-07, "logits/chosen": -1.6720600128173828, "logits/rejected": -0.9817830324172974, "logps/chosen": -605.169189453125, "logps/rejected": -1312.1810302734375, "loss": 0.0644, "rewards/accuracies": 0.875, "rewards/chosen": -0.16692718863487244, "rewards/margins": 0.32599127292633057, "rewards/rejected": -0.4929184913635254, "step": 4050 }, { "epoch": 0.77, "learning_rate": 7.425473564358457e-07, "logits/chosen": -1.6741440296173096, "logits/rejected": -1.0046089887619019, "logps/chosen": -649.8138427734375, "logps/rejected": -1462.4537353515625, "loss": 0.0276, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.19399216771125793, "rewards/margins": 0.3730100989341736, "rewards/rejected": -0.5670022368431091, "step": 4060 }, { "epoch": 0.78, "learning_rate": 7.307644504177539e-07, "logits/chosen": -1.5330688953399658, "logits/rejected": -0.9372521638870239, "logps/chosen": -686.667236328125, "logps/rejected": -1373.4873046875, "loss": 0.0607, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2182857096195221, "rewards/margins": 0.31643131375312805, "rewards/rejected": -0.5347169637680054, "step": 4070 }, { "epoch": 0.78, "learning_rate": 7.190597576216385e-07, "logits/chosen": -1.4250767230987549, "logits/rejected": -0.8781077265739441, "logps/chosen": -703.5140991210938, "logps/rejected": -1407.7247314453125, "loss": 0.061, "rewards/accuracies": 0.875, "rewards/chosen": -0.242970272898674, "rewards/margins": 0.3056332767009735, "rewards/rejected": -0.5486035943031311, "step": 4080 }, { "epoch": 0.78, "learning_rate": 7.074337954809945e-07, "logits/chosen": -1.5299713611602783, "logits/rejected": -1.0373504161834717, "logps/chosen": -718.3067626953125, "logps/rejected": -1378.4388427734375, "loss": 0.0615, "rewards/accuracies": 0.875, "rewards/chosen": -0.2197229117155075, "rewards/margins": 0.32516324520111084, "rewards/rejected": -0.5448861718177795, "step": 4090 }, { "epoch": 0.78, "learning_rate": 6.958870779488447e-07, "logits/chosen": -1.651827096939087, "logits/rejected": -0.9306151270866394, "logps/chosen": -670.0147705078125, "logps/rejected": -1367.4783935546875, "loss": 0.0726, "rewards/accuracies": 0.875, "rewards/chosen": -0.2136562615633011, "rewards/margins": 0.3230150640010834, "rewards/rejected": -0.5366712808609009, "step": 4100 }, { "epoch": 0.78, "learning_rate": 6.844201154750176e-07, "logits/chosen": -1.382893443107605, "logits/rejected": -0.9818013906478882, "logps/chosen": -585.5609130859375, "logps/rejected": -1219.7158203125, "loss": 0.0758, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.20312824845314026, "rewards/margins": 0.28206413984298706, "rewards/rejected": -0.4851924479007721, "step": 4110 }, { "epoch": 0.78, "learning_rate": 6.730334149835788e-07, "logits/chosen": -1.7012789249420166, "logits/rejected": -1.2583403587341309, "logps/chosen": -638.0623779296875, "logps/rejected": -1250.6907958984375, "loss": 0.0732, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.19990402460098267, "rewards/margins": 0.31234210729599, "rewards/rejected": -0.5122461915016174, "step": 4120 }, { "epoch": 0.79, "learning_rate": 6.617274798504286e-07, "logits/chosen": -1.6098655462265015, "logits/rejected": -1.2438991069793701, "logps/chosen": -750.6119384765625, "logps/rejected": -1468.4130859375, "loss": 0.0818, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.239259272813797, "rewards/margins": 0.3222685754299164, "rewards/rejected": -0.5615277886390686, "step": 4130 }, { "epoch": 0.79, "learning_rate": 6.505028098810407e-07, "logits/chosen": -1.4979428052902222, "logits/rejected": -0.9974969029426575, "logps/chosen": -664.5301513671875, "logps/rejected": -1350.095947265625, "loss": 0.0471, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.21733012795448303, "rewards/margins": 0.3036603033542633, "rewards/rejected": -0.5209903717041016, "step": 4140 }, { "epoch": 0.79, "learning_rate": 6.393599012883709e-07, "logits/chosen": -1.5405172109603882, "logits/rejected": -1.0617095232009888, "logps/chosen": -608.335205078125, "logps/rejected": -1415.77099609375, "loss": 0.0558, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.19087664783000946, "rewards/margins": 0.360538125038147, "rewards/rejected": -0.5514147877693176, "step": 4150 }, { "epoch": 0.79, "learning_rate": 6.282992466709247e-07, "logits/chosen": -1.568500280380249, "logits/rejected": -1.1401845216751099, "logps/chosen": -619.8330078125, "logps/rejected": -1272.417236328125, "loss": 0.0681, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.18364539742469788, "rewards/margins": 0.29479947686195374, "rewards/rejected": -0.4784448742866516, "step": 4160 }, { "epoch": 0.79, "learning_rate": 6.17321334990973e-07, "logits/chosen": -1.444135069847107, "logits/rejected": -0.8712388277053833, "logps/chosen": -682.9646606445312, "logps/rejected": -1218.102783203125, "loss": 0.1074, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.22160005569458008, "rewards/margins": 0.2552647292613983, "rewards/rejected": -0.47686487436294556, "step": 4170 }, { "epoch": 0.8, "learning_rate": 6.064266515529419e-07, "logits/chosen": -1.6089346408843994, "logits/rejected": -1.148726463317871, "logps/chosen": -718.4061279296875, "logps/rejected": -1356.4761962890625, "loss": 0.0701, "rewards/accuracies": 0.875, "rewards/chosen": -0.23002943396568298, "rewards/margins": 0.29873934388160706, "rewards/rejected": -0.5287687182426453, "step": 4180 }, { "epoch": 0.8, "learning_rate": 5.956156779819586e-07, "logits/chosen": -1.4750916957855225, "logits/rejected": -1.0166985988616943, "logps/chosen": -726.7098388671875, "logps/rejected": -1318.445556640625, "loss": 0.0743, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1983887255191803, "rewards/margins": 0.3115563988685608, "rewards/rejected": -0.5099452137947083, "step": 4190 }, { "epoch": 0.8, "learning_rate": 5.848888922025553e-07, "logits/chosen": -1.6387817859649658, "logits/rejected": -1.0083996057510376, "logps/chosen": -603.5410766601562, "logps/rejected": -1365.437744140625, "loss": 0.0537, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.17825394868850708, "rewards/margins": 0.33875900506973267, "rewards/rejected": -0.5170130133628845, "step": 4200 }, { "epoch": 0.8, "learning_rate": 5.742467684175473e-07, "logits/chosen": -1.5205031633377075, "logits/rejected": -0.9753937721252441, "logps/chosen": -701.4137573242188, "logps/rejected": -1442.636962890625, "loss": 0.0652, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2099340856075287, "rewards/margins": 0.3407210111618042, "rewards/rejected": -0.5506550669670105, "step": 4210 }, { "epoch": 0.8, "learning_rate": 5.636897770870667e-07, "logits/chosen": -1.5778207778930664, "logits/rejected": -0.8220159411430359, "logps/chosen": -642.7035522460938, "logps/rejected": -1299.4345703125, "loss": 0.0646, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1969667673110962, "rewards/margins": 0.3171537518501282, "rewards/rejected": -0.5141205191612244, "step": 4220 }, { "epoch": 0.81, "learning_rate": 5.532183849077651e-07, "logits/chosen": -1.692636489868164, "logits/rejected": -1.019176721572876, "logps/chosen": -719.9124755859375, "logps/rejected": -1373.59033203125, "loss": 0.0411, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2058321237564087, "rewards/margins": 0.3241458237171173, "rewards/rejected": -0.5299779176712036, "step": 4230 }, { "epoch": 0.81, "learning_rate": 5.428330547921809e-07, "logits/chosen": -1.5508053302764893, "logits/rejected": -0.8036486506462097, "logps/chosen": -709.2622680664062, "logps/rejected": -1367.7880859375, "loss": 0.0642, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.22711780667304993, "rewards/margins": 0.30738967657089233, "rewards/rejected": -0.5345073938369751, "step": 4240 }, { "epoch": 0.81, "learning_rate": 5.32534245848278e-07, "logits/chosen": -1.4726245403289795, "logits/rejected": -0.9846108555793762, "logps/chosen": -694.281005859375, "logps/rejected": -1363.457275390625, "loss": 0.0661, "rewards/accuracies": 0.875, "rewards/chosen": -0.2253766804933548, "rewards/margins": 0.31212395429611206, "rewards/rejected": -0.5375006794929504, "step": 4250 }, { "epoch": 0.81, "learning_rate": 5.223224133591475e-07, "logits/chosen": -1.591333031654358, "logits/rejected": -0.8938309550285339, "logps/chosen": -677.0285034179688, "logps/rejected": -1240.8616943359375, "loss": 0.0543, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.20838508009910583, "rewards/margins": 0.2973417639732361, "rewards/rejected": -0.5057269334793091, "step": 4260 }, { "epoch": 0.81, "learning_rate": 5.121980087628802e-07, "logits/chosen": -1.2787164449691772, "logits/rejected": -0.9021083116531372, "logps/chosen": -727.6304931640625, "logps/rejected": -1371.044921875, "loss": 0.1052, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2336258590221405, "rewards/margins": 0.27272963523864746, "rewards/rejected": -0.5063555240631104, "step": 4270 }, { "epoch": 0.82, "learning_rate": 5.021614796326155e-07, "logits/chosen": -1.711517333984375, "logits/rejected": -1.1945267915725708, "logps/chosen": -593.6410522460938, "logps/rejected": -1357.221435546875, "loss": 0.0416, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.16998571157455444, "rewards/margins": 0.3332723081111908, "rewards/rejected": -0.5032579898834229, "step": 4280 }, { "epoch": 0.82, "learning_rate": 4.922132696567463e-07, "logits/chosen": -1.4213650226593018, "logits/rejected": -0.9432398080825806, "logps/chosen": -738.092529296875, "logps/rejected": -1342.125732421875, "loss": 0.0948, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.21361108124256134, "rewards/margins": 0.284687340259552, "rewards/rejected": -0.4982984662055969, "step": 4290 }, { "epoch": 0.82, "learning_rate": 4.823538186193097e-07, "logits/chosen": -1.7533416748046875, "logits/rejected": -1.3407680988311768, "logps/chosen": -615.1173706054688, "logps/rejected": -1254.2808837890625, "loss": 0.0931, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1531359702348709, "rewards/margins": 0.30277323722839355, "rewards/rejected": -0.4559091627597809, "step": 4300 }, { "epoch": 0.82, "learning_rate": 4.725835623805494e-07, "logits/chosen": -1.6482717990875244, "logits/rejected": -1.1470712423324585, "logps/chosen": -706.8258056640625, "logps/rejected": -1328.094482421875, "loss": 0.1006, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.22576546669006348, "rewards/margins": 0.2724280059337616, "rewards/rejected": -0.49819356203079224, "step": 4310 }, { "epoch": 0.82, "learning_rate": 4.6290293285763816e-07, "logits/chosen": -1.7255032062530518, "logits/rejected": -0.9146900177001953, "logps/chosen": -600.8002319335938, "logps/rejected": -1215.993408203125, "loss": 0.0699, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.16509151458740234, "rewards/margins": 0.32071444392204285, "rewards/rejected": -0.4858059287071228, "step": 4320 }, { "epoch": 0.82, "learning_rate": 4.533123580055909e-07, "logits/chosen": -1.6732444763183594, "logits/rejected": -1.1781762838363647, "logps/chosen": -647.499267578125, "logps/rejected": -1235.640380859375, "loss": 0.0888, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.17901811003684998, "rewards/margins": 0.27853089570999146, "rewards/rejected": -0.45754894614219666, "step": 4330 }, { "epoch": 0.83, "learning_rate": 4.438122617983442e-07, "logits/chosen": -1.3904229402542114, "logits/rejected": -0.8612979650497437, "logps/chosen": -639.7564697265625, "logps/rejected": -1298.111572265625, "loss": 0.0449, "rewards/accuracies": 0.875, "rewards/chosen": -0.19331908226013184, "rewards/margins": 0.31781238317489624, "rewards/rejected": -0.5111314654350281, "step": 4340 }, { "epoch": 0.83, "learning_rate": 4.344030642100133e-07, "logits/chosen": -1.843650221824646, "logits/rejected": -1.2812918424606323, "logps/chosen": -640.6357421875, "logps/rejected": -1231.58642578125, "loss": 0.0726, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1844300776720047, "rewards/margins": 0.2944430112838745, "rewards/rejected": -0.478873074054718, "step": 4350 }, { "epoch": 0.83, "learning_rate": 4.250851811963236e-07, "logits/chosen": -1.2538540363311768, "logits/rejected": -0.8775166273117065, "logps/chosen": -620.1490478515625, "logps/rejected": -1215.9146728515625, "loss": 0.0662, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.20052723586559296, "rewards/margins": 0.27596521377563477, "rewards/rejected": -0.47649240493774414, "step": 4360 }, { "epoch": 0.83, "learning_rate": 4.158590246762278e-07, "logits/chosen": -1.5852545499801636, "logits/rejected": -0.8562048077583313, "logps/chosen": -759.5594482421875, "logps/rejected": -1533.1597900390625, "loss": 0.0545, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.2186473309993744, "rewards/margins": 0.3518058657646179, "rewards/rejected": -0.5704531669616699, "step": 4370 }, { "epoch": 0.83, "learning_rate": 4.0672500251369204e-07, "logits/chosen": -1.5913574695587158, "logits/rejected": -0.9126864671707153, "logps/chosen": -683.1362915039062, "logps/rejected": -1301.7376708984375, "loss": 0.0587, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2215549498796463, "rewards/margins": 0.2973330020904541, "rewards/rejected": -0.518887996673584, "step": 4380 }, { "epoch": 0.84, "learning_rate": 3.976835184996644e-07, "logits/chosen": -1.5635426044464111, "logits/rejected": -0.9335036277770996, "logps/chosen": -658.034423828125, "logps/rejected": -1340.171630859375, "loss": 0.0618, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.18267235159873962, "rewards/margins": 0.3069957494735718, "rewards/rejected": -0.4896681308746338, "step": 4390 }, { "epoch": 0.84, "learning_rate": 3.887349723342304e-07, "logits/chosen": -1.6750385761260986, "logits/rejected": -1.050916314125061, "logps/chosen": -796.87353515625, "logps/rejected": -1338.190673828125, "loss": 0.0873, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.21675412356853485, "rewards/margins": 0.2645486891269684, "rewards/rejected": -0.48130282759666443, "step": 4400 }, { "epoch": 0.84, "learning_rate": 3.798797596089351e-07, "logits/chosen": -1.6474109888076782, "logits/rejected": -1.0717977285385132, "logps/chosen": -796.557861328125, "logps/rejected": -1308.2513427734375, "loss": 0.1033, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.24403448402881622, "rewards/margins": 0.25606316328048706, "rewards/rejected": -0.5000976920127869, "step": 4410 }, { "epoch": 0.84, "learning_rate": 3.711182717893011e-07, "logits/chosen": -1.7235870361328125, "logits/rejected": -1.0134425163269043, "logps/chosen": -738.4308471679688, "logps/rejected": -1354.4034423828125, "loss": 0.1004, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.21404734253883362, "rewards/margins": 0.2857828736305237, "rewards/rejected": -0.4998301863670349, "step": 4420 }, { "epoch": 0.84, "learning_rate": 3.624508961975215e-07, "logits/chosen": -1.6678346395492554, "logits/rejected": -1.052130937576294, "logps/chosen": -560.8265380859375, "logps/rejected": -1179.17626953125, "loss": 0.0663, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1430796980857849, "rewards/margins": 0.310772180557251, "rewards/rejected": -0.4538518786430359, "step": 4430 }, { "epoch": 0.85, "learning_rate": 3.538780159953348e-07, "logits/chosen": -1.6310476064682007, "logits/rejected": -0.8700817227363586, "logps/chosen": -661.81494140625, "logps/rejected": -1262.3851318359375, "loss": 0.0626, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1743876039981842, "rewards/margins": 0.33026957511901855, "rewards/rejected": -0.5046571493148804, "step": 4440 }, { "epoch": 0.85, "learning_rate": 3.454000101670901e-07, "logits/chosen": -1.432461142539978, "logits/rejected": -0.7634763717651367, "logps/chosen": -572.1975708007812, "logps/rejected": -1243.5718994140625, "loss": 0.0669, "rewards/accuracies": 0.875, "rewards/chosen": -0.1487385481595993, "rewards/margins": 0.3277491331100464, "rewards/rejected": -0.4764877259731293, "step": 4450 }, { "epoch": 0.85, "learning_rate": 3.3701725350299143e-07, "logits/chosen": -1.6553170680999756, "logits/rejected": -0.9621874094009399, "logps/chosen": -617.3048706054688, "logps/rejected": -1175.0587158203125, "loss": 0.0827, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.18988697230815887, "rewards/margins": 0.2635541558265686, "rewards/rejected": -0.45344114303588867, "step": 4460 }, { "epoch": 0.85, "learning_rate": 3.2873011658252796e-07, "logits/chosen": -1.712868094444275, "logits/rejected": -0.9782907366752625, "logps/chosen": -598.8307495117188, "logps/rejected": -1285.5618896484375, "loss": 0.052, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1676691621541977, "rewards/margins": 0.33848708868026733, "rewards/rejected": -0.506156325340271, "step": 4470 }, { "epoch": 0.85, "learning_rate": 3.2053896575809426e-07, "logits/chosen": -1.3337204456329346, "logits/rejected": -1.099057674407959, "logps/chosen": -584.3330078125, "logps/rejected": -1244.3043212890625, "loss": 0.0771, "rewards/accuracies": 0.75, "rewards/chosen": -0.18744370341300964, "rewards/margins": 0.28308480978012085, "rewards/rejected": -0.4705285131931305, "step": 4480 }, { "epoch": 0.86, "learning_rate": 3.124441631387931e-07, "logits/chosen": -1.5639264583587646, "logits/rejected": -0.8667289018630981, "logps/chosen": -647.0604248046875, "logps/rejected": -1290.8948974609375, "loss": 0.0658, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1925622522830963, "rewards/margins": 0.3208863437175751, "rewards/rejected": -0.5134485960006714, "step": 4490 }, { "epoch": 0.86, "learning_rate": 3.044460665744284e-07, "logits/chosen": -1.630784034729004, "logits/rejected": -0.9906666874885559, "logps/chosen": -730.5087280273438, "logps/rejected": -1308.167236328125, "loss": 0.1002, "rewards/accuracies": 0.875, "rewards/chosen": -0.2203700840473175, "rewards/margins": 0.29447072744369507, "rewards/rejected": -0.5148407816886902, "step": 4500 }, { "epoch": 0.86, "learning_rate": 2.9654502963968575e-07, "logits/chosen": -1.586451530456543, "logits/rejected": -0.7961150407791138, "logps/chosen": -586.2816162109375, "logps/rejected": -1271.3458251953125, "loss": 0.0616, "rewards/accuracies": 0.875, "rewards/chosen": -0.1716344654560089, "rewards/margins": 0.32398098707199097, "rewards/rejected": -0.49561548233032227, "step": 4510 }, { "epoch": 0.86, "learning_rate": 2.8874140161849915e-07, "logits/chosen": -1.7378228902816772, "logits/rejected": -1.12490975856781, "logps/chosen": -689.92138671875, "logps/rejected": -1360.4993896484375, "loss": 0.0537, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.19938451051712036, "rewards/margins": 0.3099740147590637, "rewards/rejected": -0.5093585252761841, "step": 4520 }, { "epoch": 0.86, "learning_rate": 2.810355274886148e-07, "logits/chosen": -1.5160925388336182, "logits/rejected": -0.8361374139785767, "logps/chosen": -664.5655517578125, "logps/rejected": -1321.7562255859375, "loss": 0.0627, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1796727478504181, "rewards/margins": 0.3328721225261688, "rewards/rejected": -0.5125448703765869, "step": 4530 }, { "epoch": 0.86, "learning_rate": 2.7342774790633686e-07, "logits/chosen": -1.582262396812439, "logits/rejected": -1.2324196100234985, "logps/chosen": -573.2413330078125, "logps/rejected": -1255.7186279296875, "loss": 0.0777, "rewards/accuracies": 0.875, "rewards/chosen": -0.17455413937568665, "rewards/margins": 0.30101698637008667, "rewards/rejected": -0.4755710959434509, "step": 4540 }, { "epoch": 0.87, "learning_rate": 2.6591839919146963e-07, "logits/chosen": -1.719139814376831, "logits/rejected": -0.8366669416427612, "logps/chosen": -662.6421508789062, "logps/rejected": -1291.188720703125, "loss": 0.0676, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.19452355802059174, "rewards/margins": 0.285775750875473, "rewards/rejected": -0.4802993834018707, "step": 4550 }, { "epoch": 0.87, "learning_rate": 2.58507813312448e-07, "logits/chosen": -1.674212098121643, "logits/rejected": -1.1627938747406006, "logps/chosen": -719.9521484375, "logps/rejected": -1362.1099853515625, "loss": 0.0768, "rewards/accuracies": 0.875, "rewards/chosen": -0.22942790389060974, "rewards/margins": 0.30024927854537964, "rewards/rejected": -0.5296772122383118, "step": 4560 }, { "epoch": 0.87, "learning_rate": 2.511963178716648e-07, "logits/chosen": -1.5046024322509766, "logits/rejected": -1.0431791543960571, "logps/chosen": -589.3131713867188, "logps/rejected": -1238.647705078125, "loss": 0.073, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.17821267247200012, "rewards/margins": 0.2867395877838135, "rewards/rejected": -0.464952290058136, "step": 4570 }, { "epoch": 0.87, "learning_rate": 2.439842360909864e-07, "logits/chosen": -1.5564829111099243, "logits/rejected": -1.1388423442840576, "logps/chosen": -673.455810546875, "logps/rejected": -1347.0482177734375, "loss": 0.062, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.20791272819042206, "rewards/margins": 0.3196045756340027, "rewards/rejected": -0.5275173187255859, "step": 4580 }, { "epoch": 0.87, "learning_rate": 2.3687188679746314e-07, "logits/chosen": -1.5243313312530518, "logits/rejected": -1.1454176902770996, "logps/chosen": -675.7528686523438, "logps/rejected": -1301.16064453125, "loss": 0.0651, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.20887453854084015, "rewards/margins": 0.28879880905151367, "rewards/rejected": -0.497673362493515, "step": 4590 }, { "epoch": 0.88, "learning_rate": 2.2985958440923772e-07, "logits/chosen": -1.8085925579071045, "logits/rejected": -1.1682274341583252, "logps/chosen": -712.1884765625, "logps/rejected": -1192.75732421875, "loss": 0.0929, "rewards/accuracies": 0.75, "rewards/chosen": -0.19347521662712097, "rewards/margins": 0.2465830147266388, "rewards/rejected": -0.44005829095840454, "step": 4600 }, { "epoch": 0.88, "learning_rate": 2.2294763892164284e-07, "logits/chosen": -1.334749698638916, "logits/rejected": -1.004903793334961, "logps/chosen": -605.7439575195312, "logps/rejected": -1201.1614990234375, "loss": 0.0744, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.19247183203697205, "rewards/margins": 0.2719135582447052, "rewards/rejected": -0.46438542008399963, "step": 4610 }, { "epoch": 0.88, "learning_rate": 2.1613635589349756e-07, "logits/chosen": -1.6107902526855469, "logits/rejected": -1.1624407768249512, "logps/chosen": -651.1942138671875, "logps/rejected": -1337.1292724609375, "loss": 0.0723, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1878310590982437, "rewards/margins": 0.32857757806777954, "rewards/rejected": -0.5164086222648621, "step": 4620 }, { "epoch": 0.88, "learning_rate": 2.094260364336026e-07, "logits/chosen": -1.7380402088165283, "logits/rejected": -1.0198732614517212, "logps/chosen": -703.918701171875, "logps/rejected": -1186.9219970703125, "loss": 0.0744, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.20499920845031738, "rewards/margins": 0.2569231390953064, "rewards/rejected": -0.4619223475456238, "step": 4630 }, { "epoch": 0.88, "learning_rate": 2.0281697718742333e-07, "logits/chosen": -1.5836068391799927, "logits/rejected": -1.1563191413879395, "logps/chosen": -655.0853271484375, "logps/rejected": -1434.5350341796875, "loss": 0.06, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.19051814079284668, "rewards/margins": 0.33394360542297363, "rewards/rejected": -0.5244617462158203, "step": 4640 }, { "epoch": 0.89, "learning_rate": 1.9630947032398068e-07, "logits/chosen": -1.7695211172103882, "logits/rejected": -1.111585021018982, "logps/chosen": -688.1593017578125, "logps/rejected": -1326.374267578125, "loss": 0.0681, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2087709903717041, "rewards/margins": 0.29607993364334106, "rewards/rejected": -0.5048509240150452, "step": 4650 }, { "epoch": 0.89, "learning_rate": 1.899038035229342e-07, "logits/chosen": -1.3365787267684937, "logits/rejected": -0.951318621635437, "logps/chosen": -495.21746826171875, "logps/rejected": -1190.7960205078125, "loss": 0.064, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.15549428761005402, "rewards/margins": 0.30532047152519226, "rewards/rejected": -0.46081477403640747, "step": 4660 }, { "epoch": 0.89, "learning_rate": 1.8360025996186138e-07, "logits/chosen": -1.6793752908706665, "logits/rejected": -1.2577649354934692, "logps/chosen": -654.9149169921875, "logps/rejected": -1316.4361572265625, "loss": 0.0554, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.18013811111450195, "rewards/margins": 0.31460148096084595, "rewards/rejected": -0.4947396218776703, "step": 4670 }, { "epoch": 0.89, "learning_rate": 1.7739911830374352e-07, "logits/chosen": -1.44693922996521, "logits/rejected": -0.7544930577278137, "logps/chosen": -736.6383056640625, "logps/rejected": -1203.6229248046875, "loss": 0.0792, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2087833434343338, "rewards/margins": 0.2550183832645416, "rewards/rejected": -0.463801771402359, "step": 4680 }, { "epoch": 0.89, "learning_rate": 1.713006526846439e-07, "logits/chosen": -1.4717142581939697, "logits/rejected": -0.8292557597160339, "logps/chosen": -659.34033203125, "logps/rejected": -1225.5867919921875, "loss": 0.0778, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.18005402386188507, "rewards/margins": 0.2781462073326111, "rewards/rejected": -0.45820027589797974, "step": 4690 }, { "epoch": 0.9, "learning_rate": 1.6530513270159116e-07, "logits/chosen": -1.7716267108917236, "logits/rejected": -1.1000728607177734, "logps/chosen": -632.2623291015625, "logps/rejected": -1293.8427734375, "loss": 0.0712, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17110465466976166, "rewards/margins": 0.3263039290904999, "rewards/rejected": -0.49740856885910034, "step": 4700 }, { "epoch": 0.9, "learning_rate": 1.59412823400657e-07, "logits/chosen": -1.740073800086975, "logits/rejected": -0.9315148591995239, "logps/chosen": -628.2022705078125, "logps/rejected": -1271.313232421875, "loss": 0.0656, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.18129108846187592, "rewards/margins": 0.31177636981010437, "rewards/rejected": -0.4930674433708191, "step": 4710 }, { "epoch": 0.9, "learning_rate": 1.5362398526524463e-07, "logits/chosen": -1.902126669883728, "logits/rejected": -1.1279147863388062, "logps/chosen": -647.0514526367188, "logps/rejected": -1307.1373291015625, "loss": 0.0724, "rewards/accuracies": 0.875, "rewards/chosen": -0.16059984266757965, "rewards/margins": 0.32505813241004944, "rewards/rejected": -0.4856579899787903, "step": 4720 }, { "epoch": 0.9, "learning_rate": 1.4793887420457008e-07, "logits/chosen": -1.327126383781433, "logits/rejected": -0.998917281627655, "logps/chosen": -664.4324951171875, "logps/rejected": -1342.4854736328125, "loss": 0.0733, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.23212707042694092, "rewards/margins": 0.2786712944507599, "rewards/rejected": -0.5107983350753784, "step": 4730 }, { "epoch": 0.9, "learning_rate": 1.4235774154234855e-07, "logits/chosen": -1.6532478332519531, "logits/rejected": -0.874586284160614, "logps/chosen": -730.2886962890625, "logps/rejected": -1362.4957275390625, "loss": 0.0468, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.21704640984535217, "rewards/margins": 0.31448471546173096, "rewards/rejected": -0.5315311551094055, "step": 4740 }, { "epoch": 0.9, "learning_rate": 1.368808340056879e-07, "logits/chosen": -1.6743333339691162, "logits/rejected": -1.163001298904419, "logps/chosen": -662.5440673828125, "logps/rejected": -1255.9044189453125, "loss": 0.0612, "rewards/accuracies": 0.875, "rewards/chosen": -0.21114924550056458, "rewards/margins": 0.2891456186771393, "rewards/rejected": -0.5002948641777039, "step": 4750 }, { "epoch": 0.91, "learning_rate": 1.31508393714177e-07, "logits/chosen": -1.724111557006836, "logits/rejected": -1.1406222581863403, "logps/chosen": -682.9302978515625, "logps/rejected": -1390.936279296875, "loss": 0.0674, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.18357078731060028, "rewards/margins": 0.3388100266456604, "rewards/rejected": -0.5223808288574219, "step": 4760 }, { "epoch": 0.91, "learning_rate": 1.2624065816918414e-07, "logits/chosen": -1.5875694751739502, "logits/rejected": -0.9316909909248352, "logps/chosen": -637.0687255859375, "logps/rejected": -1340.5999755859375, "loss": 0.0489, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.20680876076221466, "rewards/margins": 0.33463913202285767, "rewards/rejected": -0.5414477586746216, "step": 4770 }, { "epoch": 0.91, "learning_rate": 1.210778602433596e-07, "logits/chosen": -1.452817678451538, "logits/rejected": -1.0641636848449707, "logps/chosen": -696.5808715820312, "logps/rejected": -1312.058837890625, "loss": 0.0763, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.21340656280517578, "rewards/margins": 0.2833073139190674, "rewards/rejected": -0.49671393632888794, "step": 4780 }, { "epoch": 0.91, "learning_rate": 1.1602022817033709e-07, "logits/chosen": -1.572145700454712, "logits/rejected": -1.0893280506134033, "logps/chosen": -582.0816040039062, "logps/rejected": -1350.13427734375, "loss": 0.0584, "rewards/accuracies": 0.875, "rewards/chosen": -0.1654617041349411, "rewards/margins": 0.3677004277706146, "rewards/rejected": -0.5331621766090393, "step": 4790 }, { "epoch": 0.91, "learning_rate": 1.1106798553464804e-07, "logits/chosen": -1.7322986125946045, "logits/rejected": -1.2693445682525635, "logps/chosen": -668.9528198242188, "logps/rejected": -1356.50634765625, "loss": 0.0663, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.19448697566986084, "rewards/margins": 0.31413862109184265, "rewards/rejected": -0.5086256265640259, "step": 4800 }, { "epoch": 0.92, "learning_rate": 1.0622135126183514e-07, "logits/chosen": -1.541881799697876, "logits/rejected": -1.1039928197860718, "logps/chosen": -730.3561401367188, "logps/rejected": -1410.5531005859375, "loss": 0.0625, "rewards/accuracies": 0.875, "rewards/chosen": -0.2166539430618286, "rewards/margins": 0.327588826417923, "rewards/rejected": -0.5442427396774292, "step": 4810 }, { "epoch": 0.92, "learning_rate": 1.0148053960877396e-07, "logits/chosen": -1.627398133277893, "logits/rejected": -1.1000397205352783, "logps/chosen": -591.6277465820312, "logps/rejected": -1181.8179931640625, "loss": 0.085, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.18314941227436066, "rewards/margins": 0.2747834324836731, "rewards/rejected": -0.45793280005455017, "step": 4820 }, { "epoch": 0.92, "learning_rate": 9.684576015420277e-08, "logits/chosen": -1.6205629110336304, "logits/rejected": -0.995998740196228, "logps/chosen": -659.4131469726562, "logps/rejected": -1200.470947265625, "loss": 0.0717, "rewards/accuracies": 0.875, "rewards/chosen": -0.19112655520439148, "rewards/margins": 0.2899617552757263, "rewards/rejected": -0.4810883402824402, "step": 4830 }, { "epoch": 0.92, "learning_rate": 9.23172177894574e-08, "logits/chosen": -1.6110188961029053, "logits/rejected": -1.0364059209823608, "logps/chosen": -547.3153076171875, "logps/rejected": -1314.8369140625, "loss": 0.054, "rewards/accuracies": 0.875, "rewards/chosen": -0.1694483757019043, "rewards/margins": 0.34166327118873596, "rewards/rejected": -0.5111116170883179, "step": 4840 }, { "epoch": 0.92, "learning_rate": 8.78951127094127e-08, "logits/chosen": -1.5068880319595337, "logits/rejected": -0.9471427798271179, "logps/chosen": -521.8685302734375, "logps/rejected": -1296.308349609375, "loss": 0.0578, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.13810133934020996, "rewards/margins": 0.3386577069759369, "rewards/rejected": -0.47675904631614685, "step": 4850 }, { "epoch": 0.93, "learning_rate": 8.357964040363209e-08, "logits/chosen": -1.290801763534546, "logits/rejected": -0.9146105647087097, "logps/chosen": -596.8779296875, "logps/rejected": -1347.836181640625, "loss": 0.0449, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.20246358215808868, "rewards/margins": 0.33046966791152954, "rewards/rejected": -0.5329331755638123, "step": 4860 }, { "epoch": 0.93, "learning_rate": 7.937099164772699e-08, "logits/chosen": -1.7896692752838135, "logits/rejected": -1.0212453603744507, "logps/chosen": -690.1158447265625, "logps/rejected": -1228.534912109375, "loss": 0.0814, "rewards/accuracies": 0.875, "rewards/chosen": -0.1973530352115631, "rewards/margins": 0.2856197655200958, "rewards/rejected": -0.48297280073165894, "step": 4870 }, { "epoch": 0.93, "learning_rate": 7.526935249492245e-08, "logits/chosen": -1.679908037185669, "logits/rejected": -1.176174283027649, "logps/chosen": -579.732421875, "logps/rejected": -1254.651611328125, "loss": 0.0666, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1832021027803421, "rewards/margins": 0.2981014847755432, "rewards/rejected": -0.4813036024570465, "step": 4880 }, { "epoch": 0.93, "learning_rate": 7.127490426783124e-08, "logits/chosen": -1.7154324054718018, "logits/rejected": -1.0123614072799683, "logps/chosen": -632.9345703125, "logps/rejected": -1300.0052490234375, "loss": 0.0656, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.19814178347587585, "rewards/margins": 0.32001256942749023, "rewards/rejected": -0.5181543231010437, "step": 4890 }, { "epoch": 0.93, "learning_rate": 6.738782355044048e-08, "logits/chosen": -1.5465729236602783, "logits/rejected": -0.9724661707878113, "logps/chosen": -593.3762817382812, "logps/rejected": -1277.333251953125, "loss": 0.0448, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.17785096168518066, "rewards/margins": 0.32004857063293457, "rewards/rejected": -0.49789953231811523, "step": 4900 }, { "epoch": 0.94, "learning_rate": 6.360828218030191e-08, "logits/chosen": -1.4088236093521118, "logits/rejected": -0.7327739000320435, "logps/chosen": -690.17626953125, "logps/rejected": -1354.6500244140625, "loss": 0.0651, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.20846012234687805, "rewards/margins": 0.302595853805542, "rewards/rejected": -0.5110559463500977, "step": 4910 }, { "epoch": 0.94, "learning_rate": 5.993644724093889e-08, "logits/chosen": -1.6966909170150757, "logits/rejected": -0.9541479349136353, "logps/chosen": -614.1896362304688, "logps/rejected": -1325.9324951171875, "loss": 0.0657, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.183970108628273, "rewards/margins": 0.33520275354385376, "rewards/rejected": -0.5191728472709656, "step": 4920 }, { "epoch": 0.94, "learning_rate": 5.637248105445775e-08, "logits/chosen": -1.7550580501556396, "logits/rejected": -0.9308683276176453, "logps/chosen": -793.6554565429688, "logps/rejected": -1485.291259765625, "loss": 0.0541, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.23346392810344696, "rewards/margins": 0.3398323655128479, "rewards/rejected": -0.573296308517456, "step": 4930 }, { "epoch": 0.94, "learning_rate": 5.291654117437262e-08, "logits/chosen": -1.7816959619522095, "logits/rejected": -1.0950982570648193, "logps/chosen": -879.9280395507812, "logps/rejected": -1422.867431640625, "loss": 0.077, "rewards/accuracies": 0.875, "rewards/chosen": -0.21035346388816833, "rewards/margins": 0.30386513471603394, "rewards/rejected": -0.5142186284065247, "step": 4940 }, { "epoch": 0.94, "learning_rate": 4.956878037864044e-08, "logits/chosen": -1.4438133239746094, "logits/rejected": -0.9569295048713684, "logps/chosen": -665.3748168945312, "logps/rejected": -1342.6187744140625, "loss": 0.0789, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.19597147405147552, "rewards/margins": 0.32199811935424805, "rewards/rejected": -0.51796954870224, "step": 4950 }, { "epoch": 0.94, "learning_rate": 4.632934666290778e-08, "logits/chosen": -1.6642974615097046, "logits/rejected": -0.914962649345398, "logps/chosen": -649.2335815429688, "logps/rejected": -1294.827392578125, "loss": 0.0438, "rewards/accuracies": 0.875, "rewards/chosen": -0.1978123039007187, "rewards/margins": 0.32232779264450073, "rewards/rejected": -0.5201401114463806, "step": 4960 }, { "epoch": 0.95, "learning_rate": 4.319838323396691e-08, "logits/chosen": -2.037479877471924, "logits/rejected": -0.9676597714424133, "logps/chosen": -689.044921875, "logps/rejected": -1347.722900390625, "loss": 0.0454, "rewards/accuracies": 0.875, "rewards/chosen": -0.1525397002696991, "rewards/margins": 0.3588055968284607, "rewards/rejected": -0.5113453269004822, "step": 4970 }, { "epoch": 0.95, "learning_rate": 4.017602850342584e-08, "logits/chosen": -1.6456416845321655, "logits/rejected": -1.1140178442001343, "logps/chosen": -595.4800415039062, "logps/rejected": -1429.136962890625, "loss": 0.0368, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.1686953455209732, "rewards/margins": 0.37380266189575195, "rewards/rejected": -0.542497992515564, "step": 4980 }, { "epoch": 0.95, "learning_rate": 3.7262416081589866e-08, "logits/chosen": -1.7311760187149048, "logits/rejected": -0.8864547610282898, "logps/chosen": -650.812744140625, "logps/rejected": -1239.8447265625, "loss": 0.0841, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17808124423027039, "rewards/margins": 0.33711546659469604, "rewards/rejected": -0.5151967406272888, "step": 4990 }, { "epoch": 0.95, "learning_rate": 3.445767477155443e-08, "logits/chosen": -1.6467472314834595, "logits/rejected": -0.9199285507202148, "logps/chosen": -666.3021240234375, "logps/rejected": -1309.499755859375, "loss": 0.0378, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.17945322394371033, "rewards/margins": 0.33249643445014954, "rewards/rejected": -0.5119495987892151, "step": 5000 }, { "epoch": 0.95, "learning_rate": 3.1761928563510956e-08, "logits/chosen": -1.5655813217163086, "logits/rejected": -0.9106910824775696, "logps/chosen": -629.9158935546875, "logps/rejected": -1288.5303955078125, "loss": 0.0589, "rewards/accuracies": 0.875, "rewards/chosen": -0.18516629934310913, "rewards/margins": 0.32465219497680664, "rewards/rejected": -0.5098185539245605, "step": 5010 }, { "epoch": 0.96, "learning_rate": 2.917529662926549e-08, "logits/chosen": -1.6372337341308594, "logits/rejected": -1.0175507068634033, "logps/chosen": -697.6114501953125, "logps/rejected": -1387.153076171875, "loss": 0.0622, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.22449293732643127, "rewards/margins": 0.31668001413345337, "rewards/rejected": -0.5411729216575623, "step": 5020 }, { "epoch": 0.96, "learning_rate": 2.669789331697148e-08, "logits/chosen": -1.5427504777908325, "logits/rejected": -1.123302698135376, "logps/chosen": -681.3251953125, "logps/rejected": -1218.051513671875, "loss": 0.0818, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1837557703256607, "rewards/margins": 0.2799621820449829, "rewards/rejected": -0.4637179970741272, "step": 5030 }, { "epoch": 0.96, "learning_rate": 2.4329828146074096e-08, "logits/chosen": -1.7077605724334717, "logits/rejected": -0.9767044186592102, "logps/chosen": -599.540771484375, "logps/rejected": -1115.1014404296875, "loss": 0.0819, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1757069081068039, "rewards/margins": 0.2653740346431732, "rewards/rejected": -0.44108089804649353, "step": 5040 }, { "epoch": 0.96, "learning_rate": 2.20712058024683e-08, "logits/chosen": -1.5907446146011353, "logits/rejected": -1.2127859592437744, "logps/chosen": -699.6365966796875, "logps/rejected": -1363.490966796875, "loss": 0.0845, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.21838286519050598, "rewards/margins": 0.3182257413864136, "rewards/rejected": -0.5366085767745972, "step": 5050 }, { "epoch": 0.96, "learning_rate": 1.9922126133870568e-08, "logits/chosen": -1.7688674926757812, "logits/rejected": -1.2168480157852173, "logps/chosen": -596.4293823242188, "logps/rejected": -1142.8013916015625, "loss": 0.1057, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1690121740102768, "rewards/margins": 0.2577545940876007, "rewards/rejected": -0.4267667829990387, "step": 5060 }, { "epoch": 0.97, "learning_rate": 1.7882684145406616e-08, "logits/chosen": -1.5492855310440063, "logits/rejected": -0.8371337652206421, "logps/chosen": -619.2363891601562, "logps/rejected": -1319.695556640625, "loss": 0.0486, "rewards/accuracies": 0.875, "rewards/chosen": -0.18321385979652405, "rewards/margins": 0.3207260072231293, "rewards/rejected": -0.5039398074150085, "step": 5070 }, { "epoch": 0.97, "learning_rate": 1.595296999541057e-08, "logits/chosen": -1.579831838607788, "logits/rejected": -1.1370495557785034, "logps/chosen": -599.0010986328125, "logps/rejected": -1130.3555908203125, "loss": 0.0703, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.17692309617996216, "rewards/margins": 0.2634755074977875, "rewards/rejected": -0.44039860367774963, "step": 5080 }, { "epoch": 0.97, "learning_rate": 1.4133068991437903e-08, "logits/chosen": -1.587927222251892, "logits/rejected": -0.8456587791442871, "logps/chosen": -758.2893676757812, "logps/rejected": -1242.40869140625, "loss": 0.0787, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.20422771573066711, "rewards/margins": 0.28214433789253235, "rewards/rejected": -0.4863719940185547, "step": 5090 }, { "epoch": 0.97, "learning_rate": 1.2423061586496476e-08, "logits/chosen": -1.3969306945800781, "logits/rejected": -1.0442532300949097, "logps/chosen": -612.6021728515625, "logps/rejected": -1355.794677734375, "loss": 0.0744, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.18990156054496765, "rewards/margins": 0.3172561228275299, "rewards/rejected": -0.5071576833724976, "step": 5100 }, { "epoch": 0.97, "learning_rate": 1.0823023375489128e-08, "logits/chosen": -1.7758409976959229, "logits/rejected": -1.0913546085357666, "logps/chosen": -643.1871948242188, "logps/rejected": -1352.025634765625, "loss": 0.0547, "rewards/accuracies": 0.875, "rewards/chosen": -0.1932803839445114, "rewards/margins": 0.3292398452758789, "rewards/rejected": -0.5225202441215515, "step": 5110 }, { "epoch": 0.98, "learning_rate": 9.333025091870507e-09, "logits/chosen": -1.6138235330581665, "logits/rejected": -1.1149636507034302, "logps/chosen": -720.6397705078125, "logps/rejected": -1290.0531005859375, "loss": 0.098, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.23546946048736572, "rewards/margins": 0.2658223509788513, "rewards/rejected": -0.5012918710708618, "step": 5120 }, { "epoch": 0.98, "learning_rate": 7.95313260452263e-09, "logits/chosen": -1.4443891048431396, "logits/rejected": -0.9066373705863953, "logps/chosen": -705.2442626953125, "logps/rejected": -1347.6341552734375, "loss": 0.0772, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.21808597445487976, "rewards/margins": 0.3010413348674774, "rewards/rejected": -0.5191273093223572, "step": 5130 }, { "epoch": 0.98, "learning_rate": 6.683406914840818e-09, "logits/chosen": -1.839503288269043, "logits/rejected": -0.9399774670600891, "logps/chosen": -724.562255859375, "logps/rejected": -1332.5740966796875, "loss": 0.0646, "rewards/accuracies": 0.875, "rewards/chosen": -0.20473381876945496, "rewards/margins": 0.34322500228881836, "rewards/rejected": -0.5479588508605957, "step": 5140 }, { "epoch": 0.98, "learning_rate": 5.523904154037529e-09, "logits/chosen": -1.5230966806411743, "logits/rejected": -1.1400573253631592, "logps/chosen": -695.667724609375, "logps/rejected": -1319.90576171875, "loss": 0.088, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2105880081653595, "rewards/margins": 0.27609017491340637, "rewards/rejected": -0.4866781234741211, "step": 5150 }, { "epoch": 0.98, "learning_rate": 4.474675580662113e-09, "logits/chosen": -1.6967852115631104, "logits/rejected": -0.9574136734008789, "logps/chosen": -639.5457153320312, "logps/rejected": -1343.309326171875, "loss": 0.0461, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.17780372500419617, "rewards/margins": 0.334329754114151, "rewards/rejected": -0.5121334195137024, "step": 5160 }, { "epoch": 0.98, "learning_rate": 3.5357675783331823e-09, "logits/chosen": -1.5961401462554932, "logits/rejected": -1.085395336151123, "logps/chosen": -672.110595703125, "logps/rejected": -1258.385986328125, "loss": 0.0927, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2102072536945343, "rewards/margins": 0.27410417795181274, "rewards/rejected": -0.48431143164634705, "step": 5170 }, { "epoch": 0.99, "learning_rate": 2.7072216536885855e-09, "logits/chosen": -1.572046160697937, "logits/rejected": -1.1685867309570312, "logps/chosen": -627.5899658203125, "logps/rejected": -1391.069091796875, "loss": 0.0609, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.18392732739448547, "rewards/margins": 0.3593160808086395, "rewards/rejected": -0.543243408203125, "step": 5180 }, { "epoch": 0.99, "learning_rate": 1.989074434551874e-09, "logits/chosen": -1.3752251863479614, "logits/rejected": -1.040238618850708, "logps/chosen": -636.2149658203125, "logps/rejected": -1396.5648193359375, "loss": 0.0509, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.2024511843919754, "rewards/margins": 0.3306366801261902, "rewards/rejected": -0.5330878496170044, "step": 5190 }, { "epoch": 0.99, "learning_rate": 1.3813576683111007e-09, "logits/chosen": -1.7658580541610718, "logits/rejected": -0.8461794853210449, "logps/chosen": -736.367431640625, "logps/rejected": -1369.6800537109375, "loss": 0.0579, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.19013865292072296, "rewards/margins": 0.3445832431316376, "rewards/rejected": -0.5347219109535217, "step": 5200 }, { "epoch": 0.99, "learning_rate": 8.840982205160498e-10, "logits/chosen": -1.1934562921524048, "logits/rejected": -0.7583077549934387, "logps/chosen": -643.0595092773438, "logps/rejected": -1491.562744140625, "loss": 0.072, "rewards/accuracies": 0.875, "rewards/chosen": -0.2331688404083252, "rewards/margins": 0.3358479142189026, "rewards/rejected": -0.5690167546272278, "step": 5210 }, { "epoch": 0.99, "learning_rate": 4.973180736911332e-10, "logits/chosen": -1.7001054286956787, "logits/rejected": -1.0497257709503174, "logps/chosen": -734.0220336914062, "logps/rejected": -1379.366943359375, "loss": 0.0586, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.19413700699806213, "rewards/margins": 0.33139941096305847, "rewards/rejected": -0.5255364179611206, "step": 5220 }, { "epoch": 1.0, "learning_rate": 2.2103432636366718e-10, "logits/chosen": -1.510568380355835, "logits/rejected": -1.0220980644226074, "logps/chosen": -764.5279541015625, "logps/rejected": -1201.794189453125, "loss": 0.0778, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1854855716228485, "rewards/margins": 0.24305124580860138, "rewards/rejected": -0.4285368025302887, "step": 5230 }, { "epoch": 1.0, "learning_rate": 5.525919230670029e-11, "logits/chosen": -1.5603317022323608, "logits/rejected": -1.0289227962493896, "logps/chosen": -833.1355590820312, "logps/rejected": -1442.788818359375, "loss": 0.0496, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.24440082907676697, "rewards/margins": 0.3070646822452545, "rewards/rejected": -0.5514655113220215, "step": 5240 }, { "epoch": 1.0, "learning_rate": 0.0, "logits/chosen": -1.7747621536254883, "logits/rejected": -1.0173823833465576, "logps/chosen": -691.9779663085938, "logps/rejected": -1244.77294921875, "loss": 0.0642, "rewards/accuracies": 0.875, "rewards/chosen": -0.19418226182460785, "rewards/margins": 0.31588393449783325, "rewards/rejected": -0.5100662708282471, "step": 5250 }, { "epoch": 1.0, "step": 5250, "total_flos": 0.0, "train_loss": 0.07741349755014693, "train_runtime": 22357.7334, "train_samples_per_second": 0.939, "train_steps_per_second": 0.235 } ], "logging_steps": 10, "max_steps": 5250, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }