{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 1659, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2e-09, "logits/chosen": -2.3448917865753174, "logits/rejected": -2.34983229637146, "logps/chosen": -6.861311912536621, "logps/rejected": -6.602255821228027, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 4e-09, "logits/chosen": -2.353008985519409, "logits/rejected": -2.3557820320129395, "logps/chosen": -6.817426681518555, "logps/rejected": -18.61421012878418, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 2 }, { "epoch": 0.01, "learning_rate": 5.999999999999999e-09, "logits/chosen": -2.4017298221588135, "logits/rejected": -2.396986722946167, "logps/chosen": -9.083703994750977, "logps/rejected": -19.469057083129883, "loss": 0.6938, "rewards/accuracies": 0.0, "rewards/chosen": 0.005378150846809149, "rewards/margins": -0.0006160736083984375, "rewards/rejected": 0.005994224455207586, "step": 3 }, { "epoch": 0.01, "learning_rate": 8e-09, "logits/chosen": -2.223820447921753, "logits/rejected": -2.2239036560058594, "logps/chosen": -9.441778182983398, "logps/rejected": -7.114092826843262, "loss": 0.6935, "rewards/accuracies": 1.0, "rewards/chosen": -0.010395431891083717, "rewards/margins": 0.001256084069609642, "rewards/rejected": -0.01165151596069336, "step": 4 }, { "epoch": 0.01, "learning_rate": 1e-08, "logits/chosen": -2.315678119659424, "logits/rejected": -2.310950994491577, "logps/chosen": -13.300193786621094, "logps/rejected": -8.320836067199707, "loss": 0.6971, "rewards/accuracies": 0.0, "rewards/chosen": -0.003943061921745539, "rewards/margins": -0.006727027706801891, "rewards/rejected": 0.0027839660178869963, "step": 5 }, { "epoch": 0.01, "learning_rate": 1.1999999999999998e-08, "logits/chosen": -2.475206136703491, "logits/rejected": -2.5870144367218018, "logps/chosen": -10.005387306213379, "logps/rejected": -36.98533248901367, "loss": 0.6911, "rewards/accuracies": 1.0, "rewards/chosen": -0.009616089053452015, "rewards/margins": 0.017036817967891693, "rewards/rejected": -0.026652907952666283, "step": 6 }, { "epoch": 0.01, "learning_rate": 1.4000000000000001e-08, "logits/chosen": -2.326702356338501, "logits/rejected": -2.3244097232818604, "logps/chosen": -8.750334739685059, "logps/rejected": -9.334360122680664, "loss": 0.6907, "rewards/accuracies": 1.0, "rewards/chosen": 0.0003682136593852192, "rewards/margins": 0.008605098351836205, "rewards/rejected": -0.008236885070800781, "step": 7 }, { "epoch": 0.01, "learning_rate": 1.6e-08, "logits/chosen": -2.3906409740448, "logits/rejected": -2.3934249877929688, "logps/chosen": -12.279275894165039, "logps/rejected": -10.15798568725586, "loss": 0.6881, "rewards/accuracies": 1.0, "rewards/chosen": 0.008382129482924938, "rewards/margins": 0.004579353146255016, "rewards/rejected": 0.003802776336669922, "step": 8 }, { "epoch": 0.02, "learning_rate": 1.8e-08, "logits/chosen": -2.4664182662963867, "logits/rejected": -2.463045835494995, "logps/chosen": -5.896607875823975, "logps/rejected": -8.374044418334961, "loss": 0.6938, "rewards/accuracies": 0.0, "rewards/chosen": 0.009452009573578835, "rewards/margins": -0.00345530454069376, "rewards/rejected": 0.012907314114272594, "step": 9 }, { "epoch": 0.02, "learning_rate": 2e-08, "logits/chosen": -2.3923041820526123, "logits/rejected": -2.386066198348999, "logps/chosen": -6.820180892944336, "logps/rejected": -10.08674144744873, "loss": 0.6952, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016377449501305819, "rewards/margins": 0.009367752820253372, "rewards/rejected": -0.01100549753755331, "step": 10 }, { "epoch": 0.02, "learning_rate": 2.2e-08, "logits/chosen": -2.4164581298828125, "logits/rejected": -2.4160656929016113, "logps/chosen": -12.643186569213867, "logps/rejected": -17.869171142578125, "loss": 0.7003, "rewards/accuracies": 0.0, "rewards/chosen": -1.5354156857938506e-05, "rewards/margins": -0.024797916412353516, "rewards/rejected": 0.024782562628388405, "step": 11 }, { "epoch": 0.02, "learning_rate": 2.3999999999999997e-08, "logits/chosen": -2.43426251411438, "logits/rejected": -2.4387967586517334, "logps/chosen": -10.887414932250977, "logps/rejected": -9.57721996307373, "loss": 0.6908, "rewards/accuracies": 1.0, "rewards/chosen": 0.018018150702118874, "rewards/margins": 0.008442115969955921, "rewards/rejected": 0.009576034732162952, "step": 12 }, { "epoch": 0.02, "learning_rate": 2.6e-08, "logits/chosen": -2.4211745262145996, "logits/rejected": -2.418473482131958, "logps/chosen": -16.296201705932617, "logps/rejected": -11.748044967651367, "loss": 0.6875, "rewards/accuracies": 0.0, "rewards/chosen": -0.00200653076171875, "rewards/margins": -0.0027687072288244963, "rewards/rejected": 0.0007621765253134072, "step": 13 }, { "epoch": 0.03, "learning_rate": 2.8000000000000003e-08, "logits/chosen": -2.3615562915802, "logits/rejected": -2.4434032440185547, "logps/chosen": -7.880118370056152, "logps/rejected": -41.931968688964844, "loss": 0.6936, "rewards/accuracies": 0.0, "rewards/chosen": 0.003388309618458152, "rewards/margins": -0.0012824058067053556, "rewards/rejected": 0.0046707154251635075, "step": 14 }, { "epoch": 0.03, "learning_rate": 3e-08, "logits/chosen": -2.3982343673706055, "logits/rejected": -2.3913323879241943, "logps/chosen": -8.542461395263672, "logps/rejected": -8.838611602783203, "loss": 0.6926, "rewards/accuracies": 1.0, "rewards/chosen": 0.014222717843949795, "rewards/margins": 0.0007259370759129524, "rewards/rejected": 0.013496780768036842, "step": 15 }, { "epoch": 0.03, "learning_rate": 3.2e-08, "logits/chosen": -2.5639572143554688, "logits/rejected": -2.5610921382904053, "logps/chosen": -17.33804702758789, "logps/rejected": -9.097455978393555, "loss": 0.6968, "rewards/accuracies": 0.0, "rewards/chosen": -0.0025140761863440275, "rewards/margins": -0.014720344915986061, "rewards/rejected": 0.01220626849681139, "step": 16 }, { "epoch": 0.03, "learning_rate": 3.4e-08, "logits/chosen": -2.4290785789489746, "logits/rejected": -2.414397954940796, "logps/chosen": -21.63210678100586, "logps/rejected": -6.984490871429443, "loss": 0.6887, "rewards/accuracies": 1.0, "rewards/chosen": 0.01378479041159153, "rewards/margins": 0.016922427341341972, "rewards/rejected": -0.0031376362312585115, "step": 17 }, { "epoch": 0.03, "learning_rate": 3.6e-08, "logits/chosen": -2.3402621746063232, "logits/rejected": -2.3498151302337646, "logps/chosen": -9.639814376831055, "logps/rejected": -9.63769245147705, "loss": 0.6927, "rewards/accuracies": 1.0, "rewards/chosen": 0.0056022643111646175, "rewards/margins": 0.006548118311911821, "rewards/rejected": -0.0009458541753701866, "step": 18 }, { "epoch": 0.03, "learning_rate": 3.7999999999999996e-08, "logits/chosen": -2.3796284198760986, "logits/rejected": -2.374650001525879, "logps/chosen": -15.205751419067383, "logps/rejected": -7.429357528686523, "loss": 0.6917, "rewards/accuracies": 0.0, "rewards/chosen": -0.014727211557328701, "rewards/margins": -0.015634680166840553, "rewards/rejected": 0.000907468784134835, "step": 19 }, { "epoch": 0.04, "learning_rate": 4e-08, "logits/chosen": -2.372921943664551, "logits/rejected": -2.3795230388641357, "logps/chosen": -5.911515235900879, "logps/rejected": -6.045673847198486, "loss": 0.6901, "rewards/accuracies": 1.0, "rewards/chosen": 0.005972957704216242, "rewards/margins": 0.008354520425200462, "rewards/rejected": -0.002381563186645508, "step": 20 }, { "epoch": 0.04, "learning_rate": 4.2e-08, "logits/chosen": -2.402557134628296, "logits/rejected": -2.4021670818328857, "logps/chosen": -16.91586685180664, "logps/rejected": -7.327460289001465, "loss": 0.6961, "rewards/accuracies": 1.0, "rewards/chosen": 0.00581703195348382, "rewards/margins": 0.006658220198005438, "rewards/rejected": -0.0008411884191446006, "step": 21 }, { "epoch": 0.04, "learning_rate": 4.4e-08, "logits/chosen": -2.3985073566436768, "logits/rejected": -2.3979105949401855, "logps/chosen": -5.86591100692749, "logps/rejected": -7.075325012207031, "loss": 0.6923, "rewards/accuracies": 1.0, "rewards/chosen": -0.00513272313401103, "rewards/margins": 0.013548040762543678, "rewards/rejected": -0.018680764362215996, "step": 22 }, { "epoch": 0.04, "learning_rate": 4.6e-08, "logits/chosen": -2.299051284790039, "logits/rejected": -2.301396369934082, "logps/chosen": -10.524219512939453, "logps/rejected": -12.241168022155762, "loss": 0.6939, "rewards/accuracies": 0.0, "rewards/chosen": -0.0005468368763104081, "rewards/margins": -0.006119346711784601, "rewards/rejected": 0.005572509951889515, "step": 23 }, { "epoch": 0.04, "learning_rate": 4.799999999999999e-08, "logits/chosen": -2.35503888130188, "logits/rejected": -2.3538386821746826, "logps/chosen": -5.4058074951171875, "logps/rejected": -6.091611862182617, "loss": 0.691, "rewards/accuracies": 1.0, "rewards/chosen": 0.009072876535356045, "rewards/margins": 0.0053757671266794205, "rewards/rejected": 0.0036971091758459806, "step": 24 }, { "epoch": 0.05, "learning_rate": 5e-08, "logits/chosen": -2.3333747386932373, "logits/rejected": -2.333815813064575, "logps/chosen": -17.2279109954834, "logps/rejected": -12.788660049438477, "loss": 0.7003, "rewards/accuracies": 0.0, "rewards/chosen": 0.013326073065400124, "rewards/margins": -0.007707405835390091, "rewards/rejected": 0.021033478900790215, "step": 25 }, { "epoch": 0.05, "learning_rate": 5.2e-08, "logits/chosen": -2.380843162536621, "logits/rejected": -2.3891408443450928, "logps/chosen": -10.382789611816406, "logps/rejected": -10.701053619384766, "loss": 0.6999, "rewards/accuracies": 0.0, "rewards/chosen": -3.0326844353112392e-05, "rewards/margins": -0.017486954107880592, "rewards/rejected": 0.017456626519560814, "step": 26 }, { "epoch": 0.05, "learning_rate": 5.4e-08, "logits/chosen": -2.4319238662719727, "logits/rejected": -2.4306769371032715, "logps/chosen": -16.024517059326172, "logps/rejected": -10.252748489379883, "loss": 0.6989, "rewards/accuracies": 0.0, "rewards/chosen": -0.0023277283180505037, "rewards/margins": -0.021920109167695045, "rewards/rejected": 0.01959238015115261, "step": 27 }, { "epoch": 0.05, "learning_rate": 5.6000000000000005e-08, "logits/chosen": -2.3821210861206055, "logits/rejected": -2.3166868686676025, "logps/chosen": -24.840362548828125, "logps/rejected": -12.768904685974121, "loss": 0.6966, "rewards/accuracies": 0.0, "rewards/chosen": -0.009904098697006702, "rewards/margins": -0.02526102215051651, "rewards/rejected": 0.015356922522187233, "step": 28 }, { "epoch": 0.05, "learning_rate": 5.7999999999999997e-08, "logits/chosen": -2.2613134384155273, "logits/rejected": -2.2917466163635254, "logps/chosen": -9.188074111938477, "logps/rejected": -21.88112449645996, "loss": 0.6902, "rewards/accuracies": 1.0, "rewards/chosen": 0.00732345599681139, "rewards/margins": 0.013580322265625, "rewards/rejected": -0.006256866734474897, "step": 29 }, { "epoch": 0.05, "learning_rate": 6e-08, "logits/chosen": -2.3161978721618652, "logits/rejected": -2.4611916542053223, "logps/chosen": -8.29227066040039, "logps/rejected": -30.51226806640625, "loss": 0.6902, "rewards/accuracies": 0.0, "rewards/chosen": 0.0021122933831065893, "rewards/margins": -0.0017615316901355982, "rewards/rejected": 0.0038738250732421875, "step": 30 }, { "epoch": 0.06, "learning_rate": 6.2e-08, "logits/chosen": -2.3254010677337646, "logits/rejected": -2.3160271644592285, "logps/chosen": -7.802990436553955, "logps/rejected": -13.133201599121094, "loss": 0.7043, "rewards/accuracies": 1.0, "rewards/chosen": 0.02068352699279785, "rewards/margins": 0.01055064145475626, "rewards/rejected": 0.010132885538041592, "step": 31 }, { "epoch": 0.06, "learning_rate": 6.4e-08, "logits/chosen": -2.399381637573242, "logits/rejected": -2.4597318172454834, "logps/chosen": -7.38586950302124, "logps/rejected": -23.632644653320312, "loss": 0.697, "rewards/accuracies": 0.0, "rewards/chosen": -0.00487627973780036, "rewards/margins": -0.021175337955355644, "rewards/rejected": 0.016299057751893997, "step": 32 }, { "epoch": 0.06, "learning_rate": 6.6e-08, "logits/chosen": -2.4966487884521484, "logits/rejected": -2.497262954711914, "logps/chosen": -6.381768226623535, "logps/rejected": -6.634945869445801, "loss": 0.6907, "rewards/accuracies": 0.0, "rewards/chosen": 0.009199476800858974, "rewards/margins": -0.0045680999755859375, "rewards/rejected": 0.013767576776444912, "step": 33 }, { "epoch": 0.06, "learning_rate": 6.8e-08, "logits/chosen": -2.397836685180664, "logits/rejected": -2.579631805419922, "logps/chosen": -10.25218391418457, "logps/rejected": -54.97833251953125, "loss": 0.6898, "rewards/accuracies": 1.0, "rewards/chosen": 0.021503640338778496, "rewards/margins": 0.02915821224451065, "rewards/rejected": -0.0076545714400708675, "step": 34 }, { "epoch": 0.06, "learning_rate": 6.999999999999999e-08, "logits/chosen": -2.246124267578125, "logits/rejected": -2.2530107498168945, "logps/chosen": -6.358066558837891, "logps/rejected": -8.714149475097656, "loss": 0.6988, "rewards/accuracies": 0.0, "rewards/chosen": 0.006029939744621515, "rewards/margins": -0.014686251059174538, "rewards/rejected": 0.020716190338134766, "step": 35 }, { "epoch": 0.07, "learning_rate": 7.2e-08, "logits/chosen": -2.3312201499938965, "logits/rejected": -2.3323092460632324, "logps/chosen": -12.635443687438965, "logps/rejected": -10.114472389221191, "loss": 0.6861, "rewards/accuracies": 0.0, "rewards/chosen": 0.0014052391052246094, "rewards/margins": -0.013797283172607422, "rewards/rejected": 0.015202522277832031, "step": 36 }, { "epoch": 0.07, "learning_rate": 7.4e-08, "logits/chosen": -2.447162389755249, "logits/rejected": -2.4495975971221924, "logps/chosen": -7.824006080627441, "logps/rejected": -7.425736427307129, "loss": 0.693, "rewards/accuracies": 0.0, "rewards/chosen": 0.010465860366821289, "rewards/margins": -0.00194630678743124, "rewards/rejected": 0.01241216715425253, "step": 37 }, { "epoch": 0.07, "learning_rate": 7.599999999999999e-08, "logits/chosen": -2.4798014163970947, "logits/rejected": -2.4473395347595215, "logps/chosen": -28.49984359741211, "logps/rejected": -8.31656551361084, "loss": 0.6916, "rewards/accuracies": 0.0, "rewards/chosen": -0.000903701817151159, "rewards/margins": -0.0034576416946947575, "rewards/rejected": 0.0025539398193359375, "step": 38 }, { "epoch": 0.07, "learning_rate": 7.8e-08, "logits/chosen": -2.4640979766845703, "logits/rejected": -2.461376428604126, "logps/chosen": -9.92983627319336, "logps/rejected": -6.600978374481201, "loss": 0.6841, "rewards/accuracies": 0.0, "rewards/chosen": 0.0025506974197924137, "rewards/margins": -0.009361982345581055, "rewards/rejected": 0.011912680231034756, "step": 39 }, { "epoch": 0.07, "learning_rate": 8e-08, "logits/chosen": -2.3568115234375, "logits/rejected": -2.481773853302002, "logps/chosen": -7.670772552490234, "logps/rejected": -36.98552703857422, "loss": 0.7044, "rewards/accuracies": 1.0, "rewards/chosen": 0.018939971923828125, "rewards/margins": 0.0004917141050100327, "rewards/rejected": 0.018448257818818092, "step": 40 }, { "epoch": 0.07, "learning_rate": 8.199999999999999e-08, "logits/chosen": -2.3002469539642334, "logits/rejected": -2.3081188201904297, "logps/chosen": -7.074734687805176, "logps/rejected": -10.827545166015625, "loss": 0.6937, "rewards/accuracies": 1.0, "rewards/chosen": 0.013248729519546032, "rewards/margins": 0.006235599052160978, "rewards/rejected": 0.007013130467385054, "step": 41 }, { "epoch": 0.08, "learning_rate": 8.4e-08, "logits/chosen": -2.27022385597229, "logits/rejected": -2.2793359756469727, "logps/chosen": -7.585087299346924, "logps/rejected": -13.808963775634766, "loss": 0.6874, "rewards/accuracies": 1.0, "rewards/chosen": 0.018505621701478958, "rewards/margins": 0.020139647647738457, "rewards/rejected": -0.0016340255970135331, "step": 42 }, { "epoch": 0.08, "learning_rate": 8.599999999999999e-08, "logits/chosen": -2.307023286819458, "logits/rejected": -2.313722610473633, "logps/chosen": -11.725784301757812, "logps/rejected": -14.10267162322998, "loss": 0.6894, "rewards/accuracies": 1.0, "rewards/chosen": 0.03845176845788956, "rewards/margins": 0.014880277216434479, "rewards/rejected": 0.023571491241455078, "step": 43 }, { "epoch": 0.08, "learning_rate": 8.8e-08, "logits/chosen": -2.3043580055236816, "logits/rejected": -2.313314199447632, "logps/chosen": -8.463017463684082, "logps/rejected": -9.628644943237305, "loss": 0.6908, "rewards/accuracies": 1.0, "rewards/chosen": 0.03678293153643608, "rewards/margins": 0.00982188992202282, "rewards/rejected": 0.02696104161441326, "step": 44 }, { "epoch": 0.08, "learning_rate": 9e-08, "logits/chosen": -2.3513708114624023, "logits/rejected": -2.441913604736328, "logps/chosen": -7.639476299285889, "logps/rejected": -35.66270446777344, "loss": 0.6912, "rewards/accuracies": 1.0, "rewards/chosen": 0.018041277304291725, "rewards/margins": 0.02560887485742569, "rewards/rejected": -0.00756759662181139, "step": 45 }, { "epoch": 0.08, "learning_rate": 9.2e-08, "logits/chosen": -2.513638973236084, "logits/rejected": -2.527595281600952, "logps/chosen": -17.517492294311523, "logps/rejected": -19.308147430419922, "loss": 0.6917, "rewards/accuracies": 1.0, "rewards/chosen": 0.03221931681036949, "rewards/margins": 0.011115647852420807, "rewards/rejected": 0.021103668957948685, "step": 46 }, { "epoch": 0.08, "learning_rate": 9.4e-08, "logits/chosen": -2.4606926441192627, "logits/rejected": -2.4727799892425537, "logps/chosen": -5.147980690002441, "logps/rejected": -17.574180603027344, "loss": 0.6974, "rewards/accuracies": 1.0, "rewards/chosen": 0.016130447387695312, "rewards/margins": 0.002154541201889515, "rewards/rejected": 0.013975906185805798, "step": 47 }, { "epoch": 0.09, "learning_rate": 9.599999999999999e-08, "logits/chosen": -2.5041418075561523, "logits/rejected": -2.538421154022217, "logps/chosen": -8.898770332336426, "logps/rejected": -14.988445281982422, "loss": 0.6852, "rewards/accuracies": 1.0, "rewards/chosen": 0.0396181121468544, "rewards/margins": 0.033495333045721054, "rewards/rejected": 0.006122780032455921, "step": 48 }, { "epoch": 0.09, "learning_rate": 9.799999999999999e-08, "logits/chosen": -2.4126386642456055, "logits/rejected": -2.4217910766601562, "logps/chosen": -7.232727527618408, "logps/rejected": -7.903555870056152, "loss": 0.6946, "rewards/accuracies": 0.0, "rewards/chosen": 0.014435911551117897, "rewards/margins": -0.017557861283421516, "rewards/rejected": 0.03199377283453941, "step": 49 }, { "epoch": 0.09, "learning_rate": 1e-07, "logits/chosen": -2.3014698028564453, "logits/rejected": -2.308087110519409, "logps/chosen": -10.799304962158203, "logps/rejected": -9.441315650939941, "loss": 0.6814, "rewards/accuracies": 1.0, "rewards/chosen": 0.040177155286073685, "rewards/margins": 0.0341496467590332, "rewards/rejected": 0.006027508061379194, "step": 50 }, { "epoch": 0.09, "learning_rate": 9.999990469240121e-08, "logits/chosen": -2.378448486328125, "logits/rejected": -2.3785440921783447, "logps/chosen": -10.508638381958008, "logps/rejected": -17.190567016601562, "loss": 0.7001, "rewards/accuracies": 1.0, "rewards/chosen": 0.05390014871954918, "rewards/margins": 0.0005460754036903381, "rewards/rejected": 0.05335407331585884, "step": 51 }, { "epoch": 0.09, "learning_rate": 9.999961876996821e-08, "logits/chosen": -2.420865058898926, "logits/rejected": -2.409257173538208, "logps/chosen": -12.432611465454102, "logps/rejected": -11.799383163452148, "loss": 0.6925, "rewards/accuracies": 0.0, "rewards/chosen": 0.0289185531437397, "rewards/margins": -0.03409251943230629, "rewards/rejected": 0.06301107257604599, "step": 52 }, { "epoch": 0.1, "learning_rate": 9.999914223379101e-08, "logits/chosen": -2.440502166748047, "logits/rejected": -2.4391658306121826, "logps/chosen": -5.867395401000977, "logps/rejected": -14.243603706359863, "loss": 0.6745, "rewards/accuracies": 1.0, "rewards/chosen": 0.03711548075079918, "rewards/margins": 0.01639547571539879, "rewards/rejected": 0.02072000503540039, "step": 53 }, { "epoch": 0.1, "learning_rate": 9.999847508568631e-08, "logits/chosen": -2.3263449668884277, "logits/rejected": -2.374784469604492, "logps/chosen": -6.9221673011779785, "logps/rejected": -24.391578674316406, "loss": 0.6925, "rewards/accuracies": 1.0, "rewards/chosen": 0.044374849647283554, "rewards/margins": 0.04406357184052467, "rewards/rejected": 0.0003112792910542339, "step": 54 }, { "epoch": 0.1, "learning_rate": 9.99976173281975e-08, "logits/chosen": -2.4658830165863037, "logits/rejected": -2.4153714179992676, "logps/chosen": -23.97774314880371, "logps/rejected": -7.61812162399292, "loss": 0.698, "rewards/accuracies": 0.0, "rewards/chosen": 0.039365004748106, "rewards/margins": -0.02614598348736763, "rewards/rejected": 0.06551098823547363, "step": 55 }, { "epoch": 0.1, "learning_rate": 9.999656896459458e-08, "logits/chosen": -2.40270733833313, "logits/rejected": -2.4032740592956543, "logps/chosen": -7.897662162780762, "logps/rejected": -7.496974468231201, "loss": 0.6918, "rewards/accuracies": 0.0, "rewards/chosen": 0.04084062576293945, "rewards/margins": -0.0017962455749511719, "rewards/rejected": 0.042636871337890625, "step": 56 }, { "epoch": 0.1, "learning_rate": 9.999532999887426e-08, "logits/chosen": -2.4633538722991943, "logits/rejected": -2.459826946258545, "logps/chosen": -7.278371334075928, "logps/rejected": -10.897589683532715, "loss": 0.6947, "rewards/accuracies": 0.0, "rewards/chosen": 0.05374107509851456, "rewards/margins": -0.006213758140802383, "rewards/rejected": 0.05995483323931694, "step": 57 }, { "epoch": 0.1, "learning_rate": 9.999390043575984e-08, "logits/chosen": -2.4049112796783447, "logits/rejected": -2.4014763832092285, "logps/chosen": -7.474339485168457, "logps/rejected": -14.555595397949219, "loss": 0.6974, "rewards/accuracies": 1.0, "rewards/chosen": 0.04447374492883682, "rewards/margins": 0.013419725000858307, "rewards/rejected": 0.031054019927978516, "step": 58 }, { "epoch": 0.11, "learning_rate": 9.999228028070124e-08, "logits/chosen": -2.451110601425171, "logits/rejected": -2.4575412273406982, "logps/chosen": -9.840293884277344, "logps/rejected": -8.262393951416016, "loss": 0.6942, "rewards/accuracies": 0.0, "rewards/chosen": 0.046073246747255325, "rewards/margins": -0.015468500554561615, "rewards/rejected": 0.06154174730181694, "step": 59 }, { "epoch": 0.11, "learning_rate": 9.9990469539875e-08, "logits/chosen": -2.3825371265411377, "logits/rejected": -2.390270709991455, "logps/chosen": -11.013154983520508, "logps/rejected": -30.90131378173828, "loss": 0.6643, "rewards/accuracies": 1.0, "rewards/chosen": 0.07539959251880646, "rewards/margins": 0.06395512074232101, "rewards/rejected": 0.011444473639130592, "step": 60 }, { "epoch": 0.11, "learning_rate": 9.99884682201842e-08, "logits/chosen": -2.3830745220184326, "logits/rejected": -2.4244637489318848, "logps/chosen": -8.95329475402832, "logps/rejected": -15.360189437866211, "loss": 0.6781, "rewards/accuracies": 1.0, "rewards/chosen": 0.08748874813318253, "rewards/margins": 0.07812328636646271, "rewards/rejected": 0.009365463629364967, "step": 61 }, { "epoch": 0.11, "learning_rate": 9.998627632925852e-08, "logits/chosen": -2.3678367137908936, "logits/rejected": -2.412022590637207, "logps/chosen": -16.58724021911621, "logps/rejected": -20.303213119506836, "loss": 0.7007, "rewards/accuracies": 1.0, "rewards/chosen": 0.044438935816287994, "rewards/margins": 0.025873566046357155, "rewards/rejected": 0.01856536976993084, "step": 62 }, { "epoch": 0.11, "learning_rate": 9.998389387545405e-08, "logits/chosen": -2.2430334091186523, "logits/rejected": -2.241560935974121, "logps/chosen": -5.474209785461426, "logps/rejected": -13.452353477478027, "loss": 0.7078, "rewards/accuracies": 1.0, "rewards/chosen": 0.05489244684576988, "rewards/margins": 0.015484429895877838, "rewards/rejected": 0.039408016949892044, "step": 63 }, { "epoch": 0.12, "learning_rate": 9.998132086785346e-08, "logits/chosen": -2.3586506843566895, "logits/rejected": -2.3619565963745117, "logps/chosen": -9.145086288452148, "logps/rejected": -11.197942733764648, "loss": 0.6966, "rewards/accuracies": 0.0, "rewards/chosen": 0.06754131615161896, "rewards/margins": -0.017583176493644714, "rewards/rejected": 0.08512449264526367, "step": 64 }, { "epoch": 0.12, "learning_rate": 9.997855731626586e-08, "logits/chosen": -2.416443347930908, "logits/rejected": -2.4229495525360107, "logps/chosen": -10.843280792236328, "logps/rejected": -10.842333793640137, "loss": 0.7068, "rewards/accuracies": 0.0, "rewards/chosen": 0.04299621656537056, "rewards/margins": -0.04639120027422905, "rewards/rejected": 0.08938741683959961, "step": 65 }, { "epoch": 0.12, "learning_rate": 9.997560323122672e-08, "logits/chosen": -2.40488862991333, "logits/rejected": -2.41951322555542, "logps/chosen": -9.371895790100098, "logps/rejected": -11.103281021118164, "loss": 0.6607, "rewards/accuracies": 1.0, "rewards/chosen": 0.1106499657034874, "rewards/margins": 0.08840150386095047, "rewards/rejected": 0.022248459979891777, "step": 66 }, { "epoch": 0.12, "learning_rate": 9.99724586239979e-08, "logits/chosen": -2.339548349380493, "logits/rejected": -2.342857599258423, "logps/chosen": -12.49120807647705, "logps/rejected": -8.855352401733398, "loss": 0.6625, "rewards/accuracies": 1.0, "rewards/chosen": 0.09497766941785812, "rewards/margins": 0.038879018276929855, "rewards/rejected": 0.05609865114092827, "step": 67 }, { "epoch": 0.12, "learning_rate": 9.996912350656761e-08, "logits/chosen": -2.4344944953918457, "logits/rejected": -2.4545233249664307, "logps/chosen": 0.0, "logps/rejected": -8.593511581420898, "loss": 0.7074, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": -0.08185730129480362, "rewards/rejected": 0.08185730129480362, "step": 68 }, { "epoch": 0.12, "learning_rate": 9.996559789165036e-08, "logits/chosen": -2.387502431869507, "logits/rejected": -2.4027185440063477, "logps/chosen": -7.360937595367432, "logps/rejected": -12.13730239868164, "loss": 0.6619, "rewards/accuracies": 1.0, "rewards/chosen": 0.16625647246837616, "rewards/margins": 0.0677366703748703, "rewards/rejected": 0.09851980209350586, "step": 69 }, { "epoch": 0.13, "learning_rate": 9.996188179268683e-08, "logits/chosen": -2.461111307144165, "logits/rejected": -2.462653160095215, "logps/chosen": -12.731412887573242, "logps/rejected": -8.274236679077148, "loss": 0.7005, "rewards/accuracies": 0.0, "rewards/chosen": 0.08631058037281036, "rewards/margins": -0.009499169886112213, "rewards/rejected": 0.09580975025892258, "step": 70 }, { "epoch": 0.13, "learning_rate": 9.995797522384393e-08, "logits/chosen": -2.3225626945495605, "logits/rejected": -2.3708510398864746, "logps/chosen": -8.587528228759766, "logps/rejected": -32.67006301879883, "loss": 0.6599, "rewards/accuracies": 1.0, "rewards/chosen": 0.10431327670812607, "rewards/margins": 0.06823882460594177, "rewards/rejected": 0.036074448376894, "step": 71 }, { "epoch": 0.13, "learning_rate": 9.995387820001468e-08, "logits/chosen": -2.3434059619903564, "logits/rejected": -2.4246110916137695, "logps/chosen": -7.356583595275879, "logps/rejected": -34.26518630981445, "loss": 0.6803, "rewards/accuracies": 1.0, "rewards/chosen": 0.08577127754688263, "rewards/margins": 0.09063845127820969, "rewards/rejected": -0.004867172334343195, "step": 72 }, { "epoch": 0.13, "learning_rate": 9.99495907368182e-08, "logits/chosen": -2.313678503036499, "logits/rejected": -2.3033361434936523, "logps/chosen": -10.49159049987793, "logps/rejected": -7.6001996994018555, "loss": 0.6671, "rewards/accuracies": 1.0, "rewards/chosen": 0.09617462009191513, "rewards/margins": 0.02295522391796112, "rewards/rejected": 0.07321939617395401, "step": 73 }, { "epoch": 0.13, "learning_rate": 9.994511285059959e-08, "logits/chosen": -2.3416731357574463, "logits/rejected": -2.333037853240967, "logps/chosen": -10.209074020385742, "logps/rejected": -5.5243024826049805, "loss": 0.6988, "rewards/accuracies": 0.0, "rewards/chosen": 0.10655460506677628, "rewards/margins": -0.01983899623155594, "rewards/rejected": 0.12639360129833221, "step": 74 }, { "epoch": 0.14, "learning_rate": 9.994044455842991e-08, "logits/chosen": -2.40622615814209, "logits/rejected": -2.4040911197662354, "logps/chosen": -5.807675838470459, "logps/rejected": -11.431625366210938, "loss": 0.6734, "rewards/accuracies": 1.0, "rewards/chosen": 0.130354642868042, "rewards/margins": 0.06381497532129288, "rewards/rejected": 0.06653966754674911, "step": 75 }, { "epoch": 0.14, "learning_rate": 9.99355858781061e-08, "logits/chosen": -2.3024539947509766, "logits/rejected": -2.2933831214904785, "logps/chosen": -8.600854873657227, "logps/rejected": -14.598154067993164, "loss": 0.6715, "rewards/accuracies": 0.0, "rewards/chosen": 0.11023731529712677, "rewards/margins": -0.007185362279415131, "rewards/rejected": 0.1174226775765419, "step": 76 }, { "epoch": 0.14, "learning_rate": 9.993053682815098e-08, "logits/chosen": -2.4337263107299805, "logits/rejected": -2.4529881477355957, "logps/chosen": 0.0, "logps/rejected": -6.768820762634277, "loss": 0.7306, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": -0.07793869823217392, "rewards/rejected": 0.07793869823217392, "step": 77 }, { "epoch": 0.14, "learning_rate": 9.992529742781299e-08, "logits/chosen": -2.380641460418701, "logits/rejected": -2.3586907386779785, "logps/chosen": -4.901264190673828, "logps/rejected": 0.0, "loss": 0.6629, "rewards/accuracies": 1.0, "rewards/chosen": 0.12497282028198242, "rewards/margins": 0.12497282028198242, "rewards/rejected": 0.0, "step": 78 }, { "epoch": 0.14, "learning_rate": 9.991986769706635e-08, "logits/chosen": -2.4663071632385254, "logits/rejected": -2.471768856048584, "logps/chosen": -8.278343200683594, "logps/rejected": -11.416742324829102, "loss": 0.7023, "rewards/accuracies": 0.0, "rewards/chosen": 0.07833452522754669, "rewards/margins": -0.03630027920007706, "rewards/rejected": 0.11463480442762375, "step": 79 }, { "epoch": 0.14, "learning_rate": 9.991424765661085e-08, "logits/chosen": -2.3394222259521484, "logits/rejected": -2.335836172103882, "logps/chosen": -7.571021556854248, "logps/rejected": -11.418249130249023, "loss": 0.6586, "rewards/accuracies": 1.0, "rewards/chosen": 0.1805979311466217, "rewards/margins": 0.14324623346328735, "rewards/rejected": 0.03735170513391495, "step": 80 }, { "epoch": 0.15, "learning_rate": 9.99084373278718e-08, "logits/chosen": -2.3478682041168213, "logits/rejected": -2.418768882751465, "logps/chosen": -6.671209812164307, "logps/rejected": -31.826217651367188, "loss": 0.6669, "rewards/accuracies": 1.0, "rewards/chosen": 0.10175319015979767, "rewards/margins": 0.0620887316763401, "rewards/rejected": 0.039664458483457565, "step": 81 }, { "epoch": 0.15, "learning_rate": 9.990243673299989e-08, "logits/chosen": -2.284860849380493, "logits/rejected": -2.2787506580352783, "logps/chosen": -6.530755996704102, "logps/rejected": -5.838879585266113, "loss": 0.6607, "rewards/accuracies": 1.0, "rewards/chosen": 0.12295951694250107, "rewards/margins": 0.05191954970359802, "rewards/rejected": 0.07103996723890305, "step": 82 }, { "epoch": 0.15, "learning_rate": 9.989624589487128e-08, "logits/chosen": -2.290731191635132, "logits/rejected": -2.289825916290283, "logps/chosen": -9.052713394165039, "logps/rejected": -18.710521697998047, "loss": 0.6688, "rewards/accuracies": 1.0, "rewards/chosen": 0.1636674851179123, "rewards/margins": 0.024275586009025574, "rewards/rejected": 0.13939189910888672, "step": 83 }, { "epoch": 0.15, "learning_rate": 9.988986483708728e-08, "logits/chosen": -2.488445997238159, "logits/rejected": -2.4864981174468994, "logps/chosen": -20.736417770385742, "logps/rejected": -13.651885032653809, "loss": 0.6386, "rewards/accuracies": 1.0, "rewards/chosen": 0.14595337212085724, "rewards/margins": 0.10019759833812714, "rewards/rejected": 0.045755770057439804, "step": 84 }, { "epoch": 0.15, "learning_rate": 9.988329358397442e-08, "logits/chosen": -2.3921732902526855, "logits/rejected": -2.3982977867126465, "logps/chosen": -8.54448127746582, "logps/rejected": -6.6750946044921875, "loss": 0.7213, "rewards/accuracies": 0.0, "rewards/chosen": 0.10241327434778214, "rewards/margins": -0.04301748424768448, "rewards/rejected": 0.1454307585954666, "step": 85 }, { "epoch": 0.16, "learning_rate": 9.987653216058434e-08, "logits/chosen": -2.3995776176452637, "logits/rejected": -2.581691026687622, "logps/chosen": -7.096373558044434, "logps/rejected": -42.93755340576172, "loss": 0.7193, "rewards/accuracies": 1.0, "rewards/chosen": 0.10297536849975586, "rewards/margins": 0.07506819069385529, "rewards/rejected": 0.027907181531190872, "step": 86 }, { "epoch": 0.16, "learning_rate": 9.986958059269364e-08, "logits/chosen": -2.277228832244873, "logits/rejected": -2.3073112964630127, "logps/chosen": 0.0, "logps/rejected": -5.434016227722168, "loss": 0.6901, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": -0.13159875571727753, "rewards/rejected": 0.13159875571727753, "step": 87 }, { "epoch": 0.16, "learning_rate": 9.986243890680379e-08, "logits/chosen": -2.51358962059021, "logits/rejected": -2.510737895965576, "logps/chosen": -8.954605102539062, "logps/rejected": -10.312479019165039, "loss": 0.6581, "rewards/accuracies": 1.0, "rewards/chosen": 0.16680908203125, "rewards/margins": 0.07205238193273544, "rewards/rejected": 0.09475670009851456, "step": 88 }, { "epoch": 0.16, "learning_rate": 9.985510713014107e-08, "logits/chosen": -2.3313119411468506, "logits/rejected": -2.3334102630615234, "logps/chosen": -5.546603679656982, "logps/rejected": -10.115448951721191, "loss": 0.6611, "rewards/accuracies": 1.0, "rewards/chosen": 0.14559884369373322, "rewards/margins": 0.030974343419075012, "rewards/rejected": 0.1146245002746582, "step": 89 }, { "epoch": 0.16, "learning_rate": 9.984758529065647e-08, "logits/chosen": -2.5282058715820312, "logits/rejected": -2.5297224521636963, "logps/chosen": -9.694153785705566, "logps/rejected": -9.248512268066406, "loss": 0.7101, "rewards/accuracies": 0.0, "rewards/chosen": 0.13918304443359375, "rewards/margins": -0.06656418740749359, "rewards/rejected": 0.20574723184108734, "step": 90 }, { "epoch": 0.16, "learning_rate": 9.983987341702549e-08, "logits/chosen": -2.294678211212158, "logits/rejected": -2.294210433959961, "logps/chosen": -7.831618309020996, "logps/rejected": -4.948800563812256, "loss": 0.7064, "rewards/accuracies": 1.0, "rewards/chosen": 0.13476915657520294, "rewards/margins": 0.0037799328565597534, "rewards/rejected": 0.1309892237186432, "step": 91 }, { "epoch": 0.17, "learning_rate": 9.983197153864815e-08, "logits/chosen": -2.3263628482818604, "logits/rejected": -2.331557035446167, "logps/chosen": -7.520052909851074, "logps/rejected": -6.226783275604248, "loss": 0.6874, "rewards/accuracies": 0.0, "rewards/chosen": 0.1455082893371582, "rewards/margins": -0.05565991997718811, "rewards/rejected": 0.2011682093143463, "step": 92 }, { "epoch": 0.17, "learning_rate": 9.982387968564882e-08, "logits/chosen": -2.459775447845459, "logits/rejected": -2.46536922454834, "logps/chosen": -11.53409194946289, "logps/rejected": -8.27365779876709, "loss": 0.7343, "rewards/accuracies": 0.0, "rewards/chosen": 0.19004860520362854, "rewards/margins": -0.020929142832756042, "rewards/rejected": 0.21097774803638458, "step": 93 }, { "epoch": 0.17, "learning_rate": 9.981559788887612e-08, "logits/chosen": -2.3325905799865723, "logits/rejected": -2.3306403160095215, "logps/chosen": -3.8971426486968994, "logps/rejected": -13.121517181396484, "loss": 0.6502, "rewards/accuracies": 1.0, "rewards/chosen": 0.20370347797870636, "rewards/margins": 0.07232244312763214, "rewards/rejected": 0.13138103485107422, "step": 94 }, { "epoch": 0.17, "learning_rate": 9.980712617990273e-08, "logits/chosen": -2.4118523597717285, "logits/rejected": -2.405606269836426, "logps/chosen": -4.918684482574463, "logps/rejected": -10.232442855834961, "loss": 0.6492, "rewards/accuracies": 1.0, "rewards/chosen": 0.15142269432544708, "rewards/margins": 0.036391258239746094, "rewards/rejected": 0.11503143608570099, "step": 95 }, { "epoch": 0.17, "learning_rate": 9.979846459102541e-08, "logits/chosen": -2.509748697280884, "logits/rejected": -2.5142505168914795, "logps/chosen": -8.220730781555176, "logps/rejected": -5.403349876403809, "loss": 0.8004, "rewards/accuracies": 0.0, "rewards/chosen": 0.15556879341602325, "rewards/margins": -0.06819792091846466, "rewards/rejected": 0.22376671433448792, "step": 96 }, { "epoch": 0.18, "learning_rate": 9.978961315526475e-08, "logits/chosen": -2.4071719646453857, "logits/rejected": -2.406477928161621, "logps/chosen": -13.656316757202148, "logps/rejected": -8.742029190063477, "loss": 0.6717, "rewards/accuracies": 0.0, "rewards/chosen": 0.14422693848609924, "rewards/margins": -0.05706796050071716, "rewards/rejected": 0.2012948989868164, "step": 97 }, { "epoch": 0.18, "learning_rate": 9.978057190636515e-08, "logits/chosen": -2.4369442462921143, "logits/rejected": -2.4459564685821533, "logps/chosen": -6.69063663482666, "logps/rejected": -4.0359392166137695, "loss": 0.7014, "rewards/accuracies": 0.0, "rewards/chosen": 0.1720501035451889, "rewards/margins": -0.0326848030090332, "rewards/rejected": 0.2047349065542221, "step": 98 }, { "epoch": 0.18, "learning_rate": 9.977134087879457e-08, "logits/chosen": -2.502683401107788, "logits/rejected": -2.5021512508392334, "logps/chosen": -4.611237049102783, "logps/rejected": -8.045561790466309, "loss": 0.6231, "rewards/accuracies": 1.0, "rewards/chosen": 0.3570040166378021, "rewards/margins": 0.15090756118297577, "rewards/rejected": 0.20609645545482635, "step": 99 }, { "epoch": 0.18, "learning_rate": 9.976192010774449e-08, "logits/chosen": -2.269315719604492, "logits/rejected": -2.293654441833496, "logps/chosen": -7.139829635620117, "logps/rejected": -13.080076217651367, "loss": 0.6281, "rewards/accuracies": 1.0, "rewards/chosen": 0.22442837059497833, "rewards/margins": 0.14268293976783752, "rewards/rejected": 0.0817454382777214, "step": 100 }, { "epoch": 0.18, "learning_rate": 9.975230962912977e-08, "logits/chosen": -2.3979849815368652, "logits/rejected": -2.3985321521759033, "logps/chosen": -9.383016586303711, "logps/rejected": -16.9833927154541, "loss": 0.6377, "rewards/accuracies": 1.0, "rewards/chosen": 0.2547125816345215, "rewards/margins": 0.12273892760276794, "rewards/rejected": 0.13197365403175354, "step": 101 }, { "epoch": 0.18, "learning_rate": 9.974250947958847e-08, "logits/chosen": -2.285163640975952, "logits/rejected": -2.271881103515625, "logps/chosen": -14.16197395324707, "logps/rejected": 0.0, "loss": 0.653, "rewards/accuracies": 1.0, "rewards/chosen": 0.1407550871372223, "rewards/margins": 0.1407550871372223, "rewards/rejected": 0.0, "step": 102 }, { "epoch": 0.19, "learning_rate": 9.973251969648172e-08, "logits/chosen": -2.2896625995635986, "logits/rejected": -2.287752389907837, "logps/chosen": -3.255819797515869, "logps/rejected": -6.427026748657227, "loss": 0.7224, "rewards/accuracies": 1.0, "rewards/chosen": 0.30816492438316345, "rewards/margins": 0.09990733861923218, "rewards/rejected": 0.20825758576393127, "step": 103 }, { "epoch": 0.19, "learning_rate": 9.972234031789364e-08, "logits/chosen": -2.3225739002227783, "logits/rejected": -2.3862287998199463, "logps/chosen": -6.25725793838501, "logps/rejected": -32.300010681152344, "loss": 0.6303, "rewards/accuracies": 1.0, "rewards/chosen": 0.21919532120227814, "rewards/margins": 0.2343740016222, "rewards/rejected": -0.015178680419921875, "step": 104 }, { "epoch": 0.19, "learning_rate": 9.97119713826311e-08, "logits/chosen": -2.439568519592285, "logits/rejected": -2.4399983882904053, "logps/chosen": -14.443065643310547, "logps/rejected": -4.665273666381836, "loss": 0.6993, "rewards/accuracies": 0.0, "rewards/chosen": 0.06276092678308487, "rewards/margins": -0.08192668110132217, "rewards/rejected": 0.14468760788440704, "step": 105 }, { "epoch": 0.19, "learning_rate": 9.970141293022363e-08, "logits/chosen": -2.421908140182495, "logits/rejected": -2.403454065322876, "logps/chosen": -11.561504364013672, "logps/rejected": -4.8331756591796875, "loss": 0.763, "rewards/accuracies": 0.0, "rewards/chosen": 0.12199535220861435, "rewards/margins": -0.23955965042114258, "rewards/rejected": 0.3615550100803375, "step": 106 }, { "epoch": 0.19, "learning_rate": 9.969066500092327e-08, "logits/chosen": -2.3292994499206543, "logits/rejected": -2.316873550415039, "logps/chosen": -8.481697082519531, "logps/rejected": -10.962151527404785, "loss": 0.6298, "rewards/accuracies": 1.0, "rewards/chosen": 0.1652754843235016, "rewards/margins": 0.09654407948255539, "rewards/rejected": 0.0687314048409462, "step": 107 }, { "epoch": 0.2, "learning_rate": 9.967972763570439e-08, "logits/chosen": -2.489640474319458, "logits/rejected": -2.520338535308838, "logps/chosen": -6.0577192306518555, "logps/rejected": -27.11642074584961, "loss": 0.6414, "rewards/accuracies": 1.0, "rewards/chosen": 0.23189668357372284, "rewards/margins": 0.1571703851222992, "rewards/rejected": 0.07472629845142365, "step": 108 }, { "epoch": 0.2, "learning_rate": 9.966860087626353e-08, "logits/chosen": -2.4388346672058105, "logits/rejected": -2.4538557529449463, "logps/chosen": -14.384653091430664, "logps/rejected": -12.384428977966309, "loss": 0.6789, "rewards/accuracies": 1.0, "rewards/chosen": 0.0928766280412674, "rewards/margins": 0.05514831840991974, "rewards/rejected": 0.037728309631347656, "step": 109 }, { "epoch": 0.2, "learning_rate": 9.965728476501931e-08, "logits/chosen": -2.419527530670166, "logits/rejected": -2.425410509109497, "logps/chosen": -8.211158752441406, "logps/rejected": -10.605290412902832, "loss": 0.7016, "rewards/accuracies": 1.0, "rewards/chosen": 0.25641193985939026, "rewards/margins": 0.13739633560180664, "rewards/rejected": 0.11901559680700302, "step": 110 }, { "epoch": 0.2, "learning_rate": 9.964577934511216e-08, "logits/chosen": -2.396045446395874, "logits/rejected": -2.3912932872772217, "logps/chosen": -5.418757915496826, "logps/rejected": -5.282065391540527, "loss": 0.6957, "rewards/accuracies": 1.0, "rewards/chosen": 0.33999982476234436, "rewards/margins": 0.12328396737575531, "rewards/rejected": 0.21671585738658905, "step": 111 }, { "epoch": 0.2, "learning_rate": 9.963408466040426e-08, "logits/chosen": -2.329381227493286, "logits/rejected": -2.3247385025024414, "logps/chosen": -19.883560180664062, "logps/rejected": -19.946130752563477, "loss": 0.5848, "rewards/accuracies": 1.0, "rewards/chosen": 0.25282764434814453, "rewards/margins": 0.14282016456127167, "rewards/rejected": 0.11000747978687286, "step": 112 }, { "epoch": 0.2, "learning_rate": 9.96222007554793e-08, "logits/chosen": -2.33381724357605, "logits/rejected": -2.3365793228149414, "logps/chosen": -10.375162124633789, "logps/rejected": -6.436722755432129, "loss": 0.723, "rewards/accuracies": 0.0, "rewards/chosen": 0.22413502633571625, "rewards/margins": -0.023867785930633545, "rewards/rejected": 0.2480028122663498, "step": 113 }, { "epoch": 0.21, "learning_rate": 9.961012767564232e-08, "logits/chosen": -2.3599414825439453, "logits/rejected": -2.3608312606811523, "logps/chosen": -5.301765441894531, "logps/rejected": -6.088128566741943, "loss": 0.617, "rewards/accuracies": 1.0, "rewards/chosen": 0.3840980529785156, "rewards/margins": 0.14477162063121796, "rewards/rejected": 0.23932643234729767, "step": 114 }, { "epoch": 0.21, "learning_rate": 9.95978654669196e-08, "logits/chosen": -2.330544948577881, "logits/rejected": -2.330049753189087, "logps/chosen": -6.536691188812256, "logps/rejected": -5.585371017456055, "loss": 0.7081, "rewards/accuracies": 0.0, "rewards/chosen": 0.2249184101819992, "rewards/margins": -0.028620287775993347, "rewards/rejected": 0.25353869795799255, "step": 115 }, { "epoch": 0.21, "learning_rate": 9.958541417605837e-08, "logits/chosen": -2.24149489402771, "logits/rejected": -2.2781310081481934, "logps/chosen": -3.715651035308838, "logps/rejected": -12.874558448791504, "loss": 0.6961, "rewards/accuracies": 1.0, "rewards/chosen": 0.18673454225063324, "rewards/margins": 0.03220850229263306, "rewards/rejected": 0.15452603995800018, "step": 116 }, { "epoch": 0.21, "learning_rate": 9.957277385052674e-08, "logits/chosen": -2.3794939517974854, "logits/rejected": -2.3460965156555176, "logps/chosen": -18.699050903320312, "logps/rejected": -5.78874397277832, "loss": 0.72, "rewards/accuracies": 0.0, "rewards/chosen": 0.26297950744628906, "rewards/margins": -0.040816694498062134, "rewards/rejected": 0.3037962019443512, "step": 117 }, { "epoch": 0.21, "learning_rate": 9.955994453851351e-08, "logits/chosen": -2.331831693649292, "logits/rejected": -2.336618661880493, "logps/chosen": -5.081736087799072, "logps/rejected": -4.265026092529297, "loss": 0.714, "rewards/accuracies": 0.0, "rewards/chosen": 0.1828291416168213, "rewards/margins": -0.15689188241958618, "rewards/rejected": 0.33972102403640747, "step": 118 }, { "epoch": 0.22, "learning_rate": 9.954692628892788e-08, "logits/chosen": -2.350738525390625, "logits/rejected": -2.3543646335601807, "logps/chosen": -5.837278366088867, "logps/rejected": -4.724364757537842, "loss": 0.6769, "rewards/accuracies": 0.0, "rewards/chosen": 0.23511438071727753, "rewards/margins": -0.11587081849575043, "rewards/rejected": 0.35098519921302795, "step": 119 }, { "epoch": 0.22, "learning_rate": 9.953371915139939e-08, "logits/chosen": -2.319340467453003, "logits/rejected": -2.322542428970337, "logps/chosen": -4.997714996337891, "logps/rejected": -7.767843246459961, "loss": 0.6475, "rewards/accuracies": 0.0, "rewards/chosen": 0.2106708586215973, "rewards/margins": -0.030094623565673828, "rewards/rejected": 0.24076548218727112, "step": 120 }, { "epoch": 0.22, "learning_rate": 9.952032317627766e-08, "logits/chosen": -2.3912508487701416, "logits/rejected": -2.3950178623199463, "logps/chosen": -6.925963878631592, "logps/rejected": -7.390510559082031, "loss": 0.7695, "rewards/accuracies": 0.0, "rewards/chosen": 0.25389763712882996, "rewards/margins": -0.01720184087753296, "rewards/rejected": 0.2710994780063629, "step": 121 }, { "epoch": 0.22, "learning_rate": 9.950673841463222e-08, "logits/chosen": -2.3534352779388428, "logits/rejected": -2.3598926067352295, "logps/chosen": -5.725888252258301, "logps/rejected": -4.058931827545166, "loss": 0.7166, "rewards/accuracies": 0.0, "rewards/chosen": 0.2865320146083832, "rewards/margins": -0.0577525794506073, "rewards/rejected": 0.3442845940589905, "step": 122 }, { "epoch": 0.22, "learning_rate": 9.94929649182523e-08, "logits/chosen": -2.3998987674713135, "logits/rejected": -2.4031379222869873, "logps/chosen": -6.551063537597656, "logps/rejected": -14.30239200592041, "loss": 0.6101, "rewards/accuracies": 1.0, "rewards/chosen": 0.37187957763671875, "rewards/margins": 0.1425579935312271, "rewards/rejected": 0.22932158410549164, "step": 123 }, { "epoch": 0.22, "learning_rate": 9.947900273964666e-08, "logits/chosen": -2.3326501846313477, "logits/rejected": -2.330869197845459, "logps/chosen": -13.666135787963867, "logps/rejected": -6.245104789733887, "loss": 0.6636, "rewards/accuracies": 0.0, "rewards/chosen": 0.2623256742954254, "rewards/margins": -0.010302841663360596, "rewards/rejected": 0.272628515958786, "step": 124 }, { "epoch": 0.23, "learning_rate": 9.94648519320434e-08, "logits/chosen": -2.6172287464141846, "logits/rejected": -2.6193552017211914, "logps/chosen": -7.048271656036377, "logps/rejected": -7.433640480041504, "loss": 0.7079, "rewards/accuracies": 0.0, "rewards/chosen": 0.23359857499599457, "rewards/margins": -0.05802436172962189, "rewards/rejected": 0.29162293672561646, "step": 125 }, { "epoch": 0.23, "learning_rate": 9.945051254938964e-08, "logits/chosen": -2.3578712940216064, "logits/rejected": -2.3552732467651367, "logps/chosen": -7.233275413513184, "logps/rejected": -8.791939735412598, "loss": 0.6092, "rewards/accuracies": 1.0, "rewards/chosen": 0.3240344226360321, "rewards/margins": 0.2336740642786026, "rewards/rejected": 0.0903603583574295, "step": 126 }, { "epoch": 0.23, "learning_rate": 9.94359846463515e-08, "logits/chosen": -2.3602776527404785, "logits/rejected": -2.451029062271118, "logps/chosen": -5.728513717651367, "logps/rejected": -32.14975357055664, "loss": 0.6505, "rewards/accuracies": 1.0, "rewards/chosen": 0.3024002015590668, "rewards/margins": 0.18520545959472656, "rewards/rejected": 0.1171947494149208, "step": 127 }, { "epoch": 0.23, "learning_rate": 9.942126827831379e-08, "logits/chosen": -2.1633834838867188, "logits/rejected": -2.1542301177978516, "logps/chosen": -5.207401275634766, "logps/rejected": -11.138128280639648, "loss": 0.7297, "rewards/accuracies": 1.0, "rewards/chosen": 0.2542322278022766, "rewards/margins": 0.0033097267150878906, "rewards/rejected": 0.2509225010871887, "step": 128 }, { "epoch": 0.23, "learning_rate": 9.940636350137972e-08, "logits/chosen": -2.3824093341827393, "logits/rejected": -2.398214340209961, "logps/chosen": -22.54759407043457, "logps/rejected": -28.212257385253906, "loss": 0.5766, "rewards/accuracies": 1.0, "rewards/chosen": 0.34521064162254333, "rewards/margins": 0.34302863478660583, "rewards/rejected": 0.0021820068359375, "step": 129 }, { "epoch": 0.24, "learning_rate": 9.939127037237086e-08, "logits/chosen": -2.3003432750701904, "logits/rejected": -2.2825422286987305, "logps/chosen": -19.465370178222656, "logps/rejected": -9.093670845031738, "loss": 0.8059, "rewards/accuracies": 0.0, "rewards/chosen": -0.03263397142291069, "rewards/margins": -0.3532991409301758, "rewards/rejected": 0.320665180683136, "step": 130 }, { "epoch": 0.24, "learning_rate": 9.937598894882681e-08, "logits/chosen": -2.394366502761841, "logits/rejected": -2.392514944076538, "logps/chosen": -4.998425483703613, "logps/rejected": -9.706464767456055, "loss": 0.778, "rewards/accuracies": 1.0, "rewards/chosen": 0.3346817195415497, "rewards/margins": 0.0013841688632965088, "rewards/rejected": 0.3332975506782532, "step": 131 }, { "epoch": 0.24, "learning_rate": 9.9360519289005e-08, "logits/chosen": -2.2909562587738037, "logits/rejected": -2.287640333175659, "logps/chosen": -11.652660369873047, "logps/rejected": -6.481339454650879, "loss": 0.7336, "rewards/accuracies": 0.0, "rewards/chosen": 0.2999385893344879, "rewards/margins": -0.023863136768341064, "rewards/rejected": 0.323801726102829, "step": 132 }, { "epoch": 0.24, "learning_rate": 9.934486145188046e-08, "logits/chosen": -2.407975196838379, "logits/rejected": -2.414870262145996, "logps/chosen": -5.586932182312012, "logps/rejected": -5.662684440612793, "loss": 0.627, "rewards/accuracies": 1.0, "rewards/chosen": 0.38898277282714844, "rewards/margins": 0.06760996580123901, "rewards/rejected": 0.3213728070259094, "step": 133 }, { "epoch": 0.24, "learning_rate": 9.932901549714562e-08, "logits/chosen": -2.4959089756011963, "logits/rejected": -2.4914638996124268, "logps/chosen": -5.2286529541015625, "logps/rejected": -10.773895263671875, "loss": 0.6704, "rewards/accuracies": 1.0, "rewards/chosen": 0.4140705168247223, "rewards/margins": 0.2159944623708725, "rewards/rejected": 0.1980760544538498, "step": 134 }, { "epoch": 0.24, "learning_rate": 9.93129814852101e-08, "logits/chosen": -2.4884254932403564, "logits/rejected": -2.4834866523742676, "logps/chosen": -9.536659240722656, "logps/rejected": -4.014127254486084, "loss": 0.832, "rewards/accuracies": 0.0, "rewards/chosen": 0.2828584611415863, "rewards/margins": -0.07511812448501587, "rewards/rejected": 0.3579765856266022, "step": 135 }, { "epoch": 0.25, "learning_rate": 9.929675947720042e-08, "logits/chosen": -2.4372262954711914, "logits/rejected": -2.4439456462860107, "logps/chosen": -5.524782657623291, "logps/rejected": -5.917871475219727, "loss": 0.6775, "rewards/accuracies": 1.0, "rewards/chosen": 0.35260701179504395, "rewards/margins": 0.07340911030769348, "rewards/rejected": 0.27919790148735046, "step": 136 }, { "epoch": 0.25, "learning_rate": 9.928034953495981e-08, "logits/chosen": -2.3696579933166504, "logits/rejected": -2.3431239128112793, "logps/chosen": -5.175504684448242, "logps/rejected": 0.0, "loss": 0.5685, "rewards/accuracies": 1.0, "rewards/chosen": 0.3216308653354645, "rewards/margins": 0.3216308653354645, "rewards/rejected": 0.0, "step": 137 }, { "epoch": 0.25, "learning_rate": 9.926375172104793e-08, "logits/chosen": -2.2885961532592773, "logits/rejected": -2.3341920375823975, "logps/chosen": -3.185248851776123, "logps/rejected": -41.28441619873047, "loss": 0.6507, "rewards/accuracies": 1.0, "rewards/chosen": 0.28955456614494324, "rewards/margins": 0.15927311778068542, "rewards/rejected": 0.1302814483642578, "step": 138 }, { "epoch": 0.25, "learning_rate": 9.924696609874072e-08, "logits/chosen": -2.340585708618164, "logits/rejected": -2.347442626953125, "logps/chosen": -9.26390266418457, "logps/rejected": -8.465582847595215, "loss": 0.7875, "rewards/accuracies": 0.0, "rewards/chosen": 0.16837158799171448, "rewards/margins": -0.28973475098609924, "rewards/rejected": 0.4581063389778137, "step": 139 }, { "epoch": 0.25, "learning_rate": 9.922999273203007e-08, "logits/chosen": -2.3730244636535645, "logits/rejected": -2.3737990856170654, "logps/chosen": -3.6206812858581543, "logps/rejected": -5.4520463943481445, "loss": 0.7288, "rewards/accuracies": 0.0, "rewards/chosen": 0.33236032724380493, "rewards/margins": -0.0668390691280365, "rewards/rejected": 0.39919939637184143, "step": 140 }, { "epoch": 0.25, "learning_rate": 9.92128316856236e-08, "logits/chosen": -2.4046804904937744, "logits/rejected": -2.409132957458496, "logps/chosen": -3.2193961143493652, "logps/rejected": -3.244476556777954, "loss": 0.7098, "rewards/accuracies": 0.0, "rewards/chosen": 0.23537135124206543, "rewards/margins": -0.21182924509048462, "rewards/rejected": 0.44720059633255005, "step": 141 }, { "epoch": 0.26, "learning_rate": 9.919548302494445e-08, "logits/chosen": -2.3331186771392822, "logits/rejected": -2.332240581512451, "logps/chosen": -4.348869323730469, "logps/rejected": -5.16462516784668, "loss": 0.6887, "rewards/accuracies": 0.0, "rewards/chosen": 0.2696552276611328, "rewards/margins": -0.10769042372703552, "rewards/rejected": 0.37734565138816833, "step": 142 }, { "epoch": 0.26, "learning_rate": 9.917794681613098e-08, "logits/chosen": -2.3846280574798584, "logits/rejected": -2.5247721672058105, "logps/chosen": -4.418698787689209, "logps/rejected": -43.710166931152344, "loss": 0.616, "rewards/accuracies": 1.0, "rewards/chosen": 0.35047584772109985, "rewards/margins": 0.30932822823524475, "rewards/rejected": 0.041147615760564804, "step": 143 }, { "epoch": 0.26, "learning_rate": 9.916022312603654e-08, "logits/chosen": -2.2873055934906006, "logits/rejected": -2.2775731086730957, "logps/chosen": -7.456738471984863, "logps/rejected": -3.014536142349243, "loss": 0.6154, "rewards/accuracies": 0.0, "rewards/chosen": 0.3298681378364563, "rewards/margins": -0.029241234064102173, "rewards/rejected": 0.35910937190055847, "step": 144 }, { "epoch": 0.26, "learning_rate": 9.914231202222925e-08, "logits/chosen": -2.3314335346221924, "logits/rejected": -2.3960654735565186, "logps/chosen": -3.4458417892456055, "logps/rejected": -30.178592681884766, "loss": 0.6695, "rewards/accuracies": 1.0, "rewards/chosen": 0.3846662938594818, "rewards/margins": 0.3484310507774353, "rewards/rejected": 0.03623523935675621, "step": 145 }, { "epoch": 0.26, "learning_rate": 9.912421357299165e-08, "logits/chosen": -2.4531500339508057, "logits/rejected": -2.4538772106170654, "logps/chosen": -7.499881267547607, "logps/rejected": -11.05164909362793, "loss": 0.6626, "rewards/accuracies": 0.0, "rewards/chosen": 0.35543209314346313, "rewards/margins": -0.03822684288024902, "rewards/rejected": 0.39365893602371216, "step": 146 }, { "epoch": 0.27, "learning_rate": 9.910592784732054e-08, "logits/chosen": -2.3901374340057373, "logits/rejected": -2.3911118507385254, "logps/chosen": -6.918378829956055, "logps/rejected": -5.708879470825195, "loss": 0.7345, "rewards/accuracies": 0.0, "rewards/chosen": 0.2744766175746918, "rewards/margins": -0.038902491331100464, "rewards/rejected": 0.31337910890579224, "step": 147 }, { "epoch": 0.27, "learning_rate": 9.908745491492669e-08, "logits/chosen": -2.352252721786499, "logits/rejected": -2.349414348602295, "logps/chosen": -4.209994316101074, "logps/rejected": -8.492484092712402, "loss": 0.6197, "rewards/accuracies": 1.0, "rewards/chosen": 0.42738983035087585, "rewards/margins": 0.2587862014770508, "rewards/rejected": 0.16860361397266388, "step": 148 }, { "epoch": 0.27, "learning_rate": 9.906879484623449e-08, "logits/chosen": -2.2259669303894043, "logits/rejected": -2.2215383052825928, "logps/chosen": -3.8469932079315186, "logps/rejected": -4.1901960372924805, "loss": 0.7971, "rewards/accuracies": 0.0, "rewards/chosen": 0.36184313893318176, "rewards/margins": -0.038312315940856934, "rewards/rejected": 0.4001554548740387, "step": 149 }, { "epoch": 0.27, "learning_rate": 9.904994771238182e-08, "logits/chosen": -2.4793896675109863, "logits/rejected": -2.4789693355560303, "logps/chosen": -5.650973320007324, "logps/rejected": -8.745323181152344, "loss": 0.6852, "rewards/accuracies": 1.0, "rewards/chosen": 0.41636475920677185, "rewards/margins": 0.01720494031906128, "rewards/rejected": 0.39915981888771057, "step": 150 }, { "epoch": 0.27, "learning_rate": 9.903091358521969e-08, "logits/chosen": -2.4204964637756348, "logits/rejected": -2.3953449726104736, "logps/chosen": -5.604249954223633, "logps/rejected": 0.0, "loss": 0.5293, "rewards/accuracies": 1.0, "rewards/chosen": 0.4445175230503082, "rewards/margins": 0.4445175230503082, "rewards/rejected": 0.0, "step": 151 }, { "epoch": 0.27, "learning_rate": 9.901169253731196e-08, "logits/chosen": -2.391305446624756, "logits/rejected": -2.3083369731903076, "logps/chosen": -29.465473175048828, "logps/rejected": -4.326236724853516, "loss": 0.693, "rewards/accuracies": 0.0, "rewards/chosen": 0.17055892944335938, "rewards/margins": -0.14043298363685608, "rewards/rejected": 0.31099191308021545, "step": 152 }, { "epoch": 0.28, "learning_rate": 9.899228464193509e-08, "logits/chosen": -2.2385663986206055, "logits/rejected": -2.2421391010284424, "logps/chosen": -5.2373247146606445, "logps/rejected": -5.835893630981445, "loss": 0.8604, "rewards/accuracies": 0.0, "rewards/chosen": 0.42703601717948914, "rewards/margins": -0.028072834014892578, "rewards/rejected": 0.4551088511943817, "step": 153 }, { "epoch": 0.28, "learning_rate": 9.897268997307793e-08, "logits/chosen": -2.3514463901519775, "logits/rejected": -2.3626067638397217, "logps/chosen": -4.751548767089844, "logps/rejected": -2.571354389190674, "loss": 0.7041, "rewards/accuracies": 0.0, "rewards/chosen": 0.3167552053928375, "rewards/margins": -0.04544869065284729, "rewards/rejected": 0.3622038960456848, "step": 154 }, { "epoch": 0.28, "learning_rate": 9.895290860544128e-08, "logits/chosen": -2.475464344024658, "logits/rejected": -2.475130081176758, "logps/chosen": -4.0883402824401855, "logps/rejected": -3.203341007232666, "loss": 0.7025, "rewards/accuracies": 0.0, "rewards/chosen": 0.3168817162513733, "rewards/margins": -0.029966741800308228, "rewards/rejected": 0.3468484580516815, "step": 155 }, { "epoch": 0.28, "learning_rate": 9.893294061443771e-08, "logits/chosen": -2.444441795349121, "logits/rejected": -2.4293289184570312, "logps/chosen": -3.0139830112457275, "logps/rejected": 0.0, "loss": 0.601, "rewards/accuracies": 1.0, "rewards/chosen": 0.4825661778450012, "rewards/margins": 0.4825661778450012, "rewards/rejected": 0.0, "step": 156 }, { "epoch": 0.28, "learning_rate": 9.89127860761913e-08, "logits/chosen": -2.2741613388061523, "logits/rejected": -2.3446784019470215, "logps/chosen": -4.556302070617676, "logps/rejected": -29.802183151245117, "loss": 0.5754, "rewards/accuracies": 1.0, "rewards/chosen": 0.3223261833190918, "rewards/margins": 0.3490138053894043, "rewards/rejected": -0.0266876220703125, "step": 157 }, { "epoch": 0.29, "learning_rate": 9.889244506753727e-08, "logits/chosen": -2.3337795734405518, "logits/rejected": -2.3238365650177, "logps/chosen": -5.901487350463867, "logps/rejected": -9.100683212280273, "loss": 0.7133, "rewards/accuracies": 0.0, "rewards/chosen": 0.34157106280326843, "rewards/margins": -0.02224433422088623, "rewards/rejected": 0.36381539702415466, "step": 158 }, { "epoch": 0.29, "learning_rate": 9.887191766602172e-08, "logits/chosen": -2.360607147216797, "logits/rejected": -2.3642022609710693, "logps/chosen": -5.986814498901367, "logps/rejected": -3.8630101680755615, "loss": 0.5095, "rewards/accuracies": 1.0, "rewards/chosen": 0.586063802242279, "rewards/margins": 0.11051294207572937, "rewards/rejected": 0.4755508601665497, "step": 159 }, { "epoch": 0.29, "learning_rate": 9.885120394990134e-08, "logits/chosen": -2.307889699935913, "logits/rejected": -2.3055551052093506, "logps/chosen": -6.351358413696289, "logps/rejected": -7.088263511657715, "loss": 0.5476, "rewards/accuracies": 1.0, "rewards/chosen": 0.4531879425048828, "rewards/margins": 0.044666558504104614, "rewards/rejected": 0.4085213840007782, "step": 160 }, { "epoch": 0.29, "learning_rate": 9.883030399814313e-08, "logits/chosen": -2.3227860927581787, "logits/rejected": -2.4040451049804688, "logps/chosen": -6.825047016143799, "logps/rejected": -39.92510986328125, "loss": 0.7133, "rewards/accuracies": 1.0, "rewards/chosen": 0.48374316096305847, "rewards/margins": 0.5599234104156494, "rewards/rejected": -0.07618027180433273, "step": 161 }, { "epoch": 0.29, "learning_rate": 9.880921789042405e-08, "logits/chosen": -2.2564949989318848, "logits/rejected": -2.2463083267211914, "logps/chosen": -13.301008224487305, "logps/rejected": -2.804979085922241, "loss": 0.6952, "rewards/accuracies": 0.0, "rewards/chosen": 0.45237332582473755, "rewards/margins": -0.03626468777656555, "rewards/rejected": 0.4886380136013031, "step": 162 }, { "epoch": 0.29, "learning_rate": 9.878794570713074e-08, "logits/chosen": -2.40518856048584, "logits/rejected": -2.408529281616211, "logps/chosen": -4.565448760986328, "logps/rejected": -3.423964023590088, "loss": 0.6752, "rewards/accuracies": 0.0, "rewards/chosen": 0.41735488176345825, "rewards/margins": -0.03257623314857483, "rewards/rejected": 0.4499311149120331, "step": 163 }, { "epoch": 0.3, "learning_rate": 9.876648752935923e-08, "logits/chosen": -2.4037322998046875, "logits/rejected": -2.4054551124572754, "logps/chosen": -8.090978622436523, "logps/rejected": -3.9173922538757324, "loss": 0.6837, "rewards/accuracies": 0.0, "rewards/chosen": 0.42444878816604614, "rewards/margins": -0.10932201147079468, "rewards/rejected": 0.5337707996368408, "step": 164 }, { "epoch": 0.3, "learning_rate": 9.874484343891462e-08, "logits/chosen": -2.278142213821411, "logits/rejected": -2.2972264289855957, "logps/chosen": -13.878495216369629, "logps/rejected": -12.490602493286133, "loss": 0.6528, "rewards/accuracies": 1.0, "rewards/chosen": 0.27225637435913086, "rewards/margins": 0.10492353141307831, "rewards/rejected": 0.16733284294605255, "step": 165 }, { "epoch": 0.3, "learning_rate": 9.872301351831079e-08, "logits/chosen": -2.414138078689575, "logits/rejected": -2.402587413787842, "logps/chosen": -2.205801010131836, "logps/rejected": -14.446975708007812, "loss": 0.5611, "rewards/accuracies": 1.0, "rewards/chosen": 0.3822474479675293, "rewards/margins": 0.3392859399318695, "rewards/rejected": 0.04296150431036949, "step": 166 }, { "epoch": 0.3, "learning_rate": 9.870099785077e-08, "logits/chosen": -2.37672758102417, "logits/rejected": -2.3982558250427246, "logps/chosen": -3.158907175064087, "logps/rejected": -28.501371383666992, "loss": 0.6459, "rewards/accuracies": 1.0, "rewards/chosen": 0.2982906699180603, "rewards/margins": 0.22389644384384155, "rewards/rejected": 0.07439422607421875, "step": 167 }, { "epoch": 0.3, "learning_rate": 9.867879652022266e-08, "logits/chosen": -2.4120256900787354, "logits/rejected": -2.4160079956054688, "logps/chosen": -3.532386302947998, "logps/rejected": -5.985978603363037, "loss": 0.6756, "rewards/accuracies": 1.0, "rewards/chosen": 0.4264780580997467, "rewards/margins": 0.10504484176635742, "rewards/rejected": 0.3214332163333893, "step": 168 }, { "epoch": 0.31, "learning_rate": 9.865640961130701e-08, "logits/chosen": -2.3550283908843994, "logits/rejected": -2.3676555156707764, "logps/chosen": -12.905096054077148, "logps/rejected": -15.535593032836914, "loss": 0.6113, "rewards/accuracies": 1.0, "rewards/chosen": 0.36911851167678833, "rewards/margins": 0.13777314126491547, "rewards/rejected": 0.23134537041187286, "step": 169 }, { "epoch": 0.31, "learning_rate": 9.863383720936876e-08, "logits/chosen": -2.410531997680664, "logits/rejected": -2.414226770401001, "logps/chosen": -2.008216619491577, "logps/rejected": -6.75429630279541, "loss": 0.5459, "rewards/accuracies": 1.0, "rewards/chosen": 0.6186114549636841, "rewards/margins": 0.18225565552711487, "rewards/rejected": 0.4363557994365692, "step": 170 }, { "epoch": 0.31, "learning_rate": 9.861107940046074e-08, "logits/chosen": -2.41157603263855, "logits/rejected": -2.4086058139801025, "logps/chosen": -2.225130558013916, "logps/rejected": -12.512733459472656, "loss": 0.677, "rewards/accuracies": 1.0, "rewards/chosen": 0.548419177532196, "rewards/margins": 0.07907178997993469, "rewards/rejected": 0.46934738755226135, "step": 171 }, { "epoch": 0.31, "learning_rate": 9.858813627134266e-08, "logits/chosen": -2.386613130569458, "logits/rejected": -2.393176317214966, "logps/chosen": -4.812203407287598, "logps/rejected": -3.3035030364990234, "loss": 0.5878, "rewards/accuracies": 0.0, "rewards/chosen": 0.496614933013916, "rewards/margins": -0.014029979705810547, "rewards/rejected": 0.5106449127197266, "step": 172 }, { "epoch": 0.31, "learning_rate": 9.856500790948067e-08, "logits/chosen": -2.299189805984497, "logits/rejected": -2.3573412895202637, "logps/chosen": -5.376588344573975, "logps/rejected": -12.70197868347168, "loss": 0.6472, "rewards/accuracies": 1.0, "rewards/chosen": 0.6443411707878113, "rewards/margins": 0.22136905789375305, "rewards/rejected": 0.4229721128940582, "step": 173 }, { "epoch": 0.31, "learning_rate": 9.854169440304714e-08, "logits/chosen": -2.3846664428710938, "logits/rejected": -2.3708317279815674, "logps/chosen": -4.453742504119873, "logps/rejected": -9.683510780334473, "loss": 0.6206, "rewards/accuracies": 1.0, "rewards/chosen": 0.3926750719547272, "rewards/margins": 0.1098126769065857, "rewards/rejected": 0.2828623950481415, "step": 174 }, { "epoch": 0.32, "learning_rate": 9.851819584092024e-08, "logits/chosen": -2.453169107437134, "logits/rejected": -2.4475655555725098, "logps/chosen": -1.7248952388763428, "logps/rejected": -7.463311195373535, "loss": 0.6449, "rewards/accuracies": 1.0, "rewards/chosen": 0.35752037167549133, "rewards/margins": 0.06157785654067993, "rewards/rejected": 0.2959425151348114, "step": 175 }, { "epoch": 0.32, "learning_rate": 9.849451231268364e-08, "logits/chosen": -2.4077208042144775, "logits/rejected": -2.4052789211273193, "logps/chosen": -2.0579617023468018, "logps/rejected": -7.606479167938232, "loss": 0.6445, "rewards/accuracies": 0.0, "rewards/chosen": 0.39832961559295654, "rewards/margins": -0.1471152901649475, "rewards/rejected": 0.545444905757904, "step": 176 }, { "epoch": 0.32, "learning_rate": 9.847064390862611e-08, "logits/chosen": -2.425203561782837, "logits/rejected": -2.4318995475769043, "logps/chosen": -3.70802640914917, "logps/rejected": -2.4776244163513184, "loss": 0.7135, "rewards/accuracies": 0.0, "rewards/chosen": 0.40364891290664673, "rewards/margins": -0.038528233766555786, "rewards/rejected": 0.4421771466732025, "step": 177 }, { "epoch": 0.32, "learning_rate": 9.844659071974132e-08, "logits/chosen": -2.3530049324035645, "logits/rejected": -2.364483118057251, "logps/chosen": -14.290950775146484, "logps/rejected": -12.987921714782715, "loss": 0.6413, "rewards/accuracies": 1.0, "rewards/chosen": 0.44115373492240906, "rewards/margins": 0.13121387362480164, "rewards/rejected": 0.3099398612976074, "step": 178 }, { "epoch": 0.32, "learning_rate": 9.842235283772728e-08, "logits/chosen": -2.2652320861816406, "logits/rejected": -2.2559478282928467, "logps/chosen": -3.8832685947418213, "logps/rejected": -9.988810539245605, "loss": 0.6756, "rewards/accuracies": 0.0, "rewards/chosen": 0.4603688418865204, "rewards/margins": -0.014547228813171387, "rewards/rejected": 0.4749160706996918, "step": 179 }, { "epoch": 0.33, "learning_rate": 9.83979303549862e-08, "logits/chosen": -2.3667478561401367, "logits/rejected": -2.368976593017578, "logps/chosen": -18.137113571166992, "logps/rejected": -3.352856397628784, "loss": 0.854, "rewards/accuracies": 0.0, "rewards/chosen": 0.33309632539749146, "rewards/margins": -0.22045964002609253, "rewards/rejected": 0.553555965423584, "step": 180 }, { "epoch": 0.33, "learning_rate": 9.837332336462399e-08, "logits/chosen": -2.4608213901519775, "logits/rejected": -2.468301296234131, "logps/chosen": -5.488571643829346, "logps/rejected": -5.219368934631348, "loss": 0.7867, "rewards/accuracies": 1.0, "rewards/chosen": 0.5994060039520264, "rewards/margins": 0.06812483072280884, "rewards/rejected": 0.5312811732292175, "step": 181 }, { "epoch": 0.33, "learning_rate": 9.834853196044999e-08, "logits/chosen": -2.444518804550171, "logits/rejected": -2.3832757472991943, "logps/chosen": -27.210079193115234, "logps/rejected": -3.131448268890381, "loss": 1.0935, "rewards/accuracies": 0.0, "rewards/chosen": -0.7457588315010071, "rewards/margins": -1.2053353786468506, "rewards/rejected": 0.4595766067504883, "step": 182 }, { "epoch": 0.33, "learning_rate": 9.832355623697656e-08, "logits/chosen": -2.4127964973449707, "logits/rejected": -2.412123441696167, "logps/chosen": -3.578247547149658, "logps/rejected": -2.1123740673065186, "loss": 0.7128, "rewards/accuracies": 1.0, "rewards/chosen": 0.5269351601600647, "rewards/margins": 0.14371368288993835, "rewards/rejected": 0.38322147727012634, "step": 183 }, { "epoch": 0.33, "learning_rate": 9.829839628941874e-08, "logits/chosen": -2.2883358001708984, "logits/rejected": -2.2725131511688232, "logps/chosen": -16.095273971557617, "logps/rejected": -5.199501991271973, "loss": 0.5921, "rewards/accuracies": 0.0, "rewards/chosen": 0.3129991590976715, "rewards/margins": -0.06156128644943237, "rewards/rejected": 0.3745604455471039, "step": 184 }, { "epoch": 0.33, "learning_rate": 9.827305221369392e-08, "logits/chosen": -2.363656759262085, "logits/rejected": -2.36694073677063, "logps/chosen": -2.856088876724243, "logps/rejected": -13.62873649597168, "loss": 0.5444, "rewards/accuracies": 1.0, "rewards/chosen": 0.4118243157863617, "rewards/margins": 0.2856067419052124, "rewards/rejected": 0.1262175589799881, "step": 185 }, { "epoch": 0.34, "learning_rate": 9.824752410642139e-08, "logits/chosen": -2.438028573989868, "logits/rejected": -2.437992572784424, "logps/chosen": -4.2270426750183105, "logps/rejected": -4.512109279632568, "loss": 0.7501, "rewards/accuracies": 1.0, "rewards/chosen": 0.7199585437774658, "rewards/margins": 0.23100653290748596, "rewards/rejected": 0.48895201086997986, "step": 186 }, { "epoch": 0.34, "learning_rate": 9.822181206492208e-08, "logits/chosen": -2.4233713150024414, "logits/rejected": -2.42000150680542, "logps/chosen": -12.934538841247559, "logps/rejected": -11.114323616027832, "loss": 0.6498, "rewards/accuracies": 0.0, "rewards/chosen": 0.333803653717041, "rewards/margins": -0.047032564878463745, "rewards/rejected": 0.38083621859550476, "step": 187 }, { "epoch": 0.34, "learning_rate": 9.81959161872181e-08, "logits/chosen": -2.3940794467926025, "logits/rejected": -2.4153497219085693, "logps/chosen": -8.050779342651367, "logps/rejected": -16.598194122314453, "loss": 0.8298, "rewards/accuracies": 0.0, "rewards/chosen": 0.33904552459716797, "rewards/margins": -0.16535896062850952, "rewards/rejected": 0.5044044852256775, "step": 188 }, { "epoch": 0.34, "learning_rate": 9.816983657203241e-08, "logits/chosen": -2.2695600986480713, "logits/rejected": -2.27227520942688, "logps/chosen": -4.070262908935547, "logps/rejected": -13.427098274230957, "loss": 0.6181, "rewards/accuracies": 1.0, "rewards/chosen": 0.48199978470802307, "rewards/margins": 0.16659194231033325, "rewards/rejected": 0.3154078423976898, "step": 189 }, { "epoch": 0.34, "learning_rate": 9.814357331878843e-08, "logits/chosen": -2.3518834114074707, "logits/rejected": -2.3256943225860596, "logps/chosen": -1.8952242136001587, "logps/rejected": 0.0, "loss": 0.5826, "rewards/accuracies": 1.0, "rewards/chosen": 0.48519864678382874, "rewards/margins": 0.48519864678382874, "rewards/rejected": 0.0, "step": 190 }, { "epoch": 0.35, "learning_rate": 9.811712652760965e-08, "logits/chosen": -2.3677690029144287, "logits/rejected": -2.366133689880371, "logps/chosen": -3.5598487854003906, "logps/rejected": -16.020605087280273, "loss": 0.5618, "rewards/accuracies": 1.0, "rewards/chosen": 0.648601233959198, "rewards/margins": 0.48247650265693665, "rewards/rejected": 0.16612473130226135, "step": 191 }, { "epoch": 0.35, "learning_rate": 9.809049629931929e-08, "logits/chosen": -2.3101651668548584, "logits/rejected": -2.3220880031585693, "logps/chosen": -3.2479608058929443, "logps/rejected": -12.124040603637695, "loss": 0.7175, "rewards/accuracies": 1.0, "rewards/chosen": 0.46434852480888367, "rewards/margins": 0.23431426286697388, "rewards/rejected": 0.2300342619419098, "step": 192 }, { "epoch": 0.35, "learning_rate": 9.806368273543987e-08, "logits/chosen": -2.2685556411743164, "logits/rejected": -2.259854793548584, "logps/chosen": -6.325167655944824, "logps/rejected": -3.508134126663208, "loss": 0.7502, "rewards/accuracies": 0.0, "rewards/chosen": 0.5493112802505493, "rewards/margins": -0.07594287395477295, "rewards/rejected": 0.6252541542053223, "step": 193 }, { "epoch": 0.35, "learning_rate": 9.803668593819286e-08, "logits/chosen": -2.420024871826172, "logits/rejected": -2.39699387550354, "logps/chosen": -10.086426734924316, "logps/rejected": -9.8082275390625, "loss": 0.6265, "rewards/accuracies": 1.0, "rewards/chosen": 0.7138819694519043, "rewards/margins": 0.20694702863693237, "rewards/rejected": 0.5069349408149719, "step": 194 }, { "epoch": 0.35, "learning_rate": 9.800950601049822e-08, "logits/chosen": -2.4044206142425537, "logits/rejected": -2.357828140258789, "logps/chosen": -25.871540069580078, "logps/rejected": -2.6921136379241943, "loss": 0.8422, "rewards/accuracies": 0.0, "rewards/chosen": 0.026157760992646217, "rewards/margins": -0.4228534698486328, "rewards/rejected": 0.4490112364292145, "step": 195 }, { "epoch": 0.35, "learning_rate": 9.798214305597412e-08, "logits/chosen": -2.362349271774292, "logits/rejected": -2.3680315017700195, "logps/chosen": -3.5650649070739746, "logps/rejected": -2.108748435974121, "loss": 0.6777, "rewards/accuracies": 1.0, "rewards/chosen": 0.5900177359580994, "rewards/margins": 0.0707242488861084, "rewards/rejected": 0.519293487071991, "step": 196 }, { "epoch": 0.36, "learning_rate": 9.795459717893648e-08, "logits/chosen": -2.2649645805358887, "logits/rejected": -2.264355182647705, "logps/chosen": -7.999940872192383, "logps/rejected": -11.440999031066895, "loss": 0.5919, "rewards/accuracies": 1.0, "rewards/chosen": 0.48198938369750977, "rewards/margins": 0.2174072265625, "rewards/rejected": 0.26458215713500977, "step": 197 }, { "epoch": 0.36, "learning_rate": 9.79268684843985e-08, "logits/chosen": -2.305682420730591, "logits/rejected": -2.3166778087615967, "logps/chosen": -2.496124267578125, "logps/rejected": -3.350456714630127, "loss": 0.6942, "rewards/accuracies": 1.0, "rewards/chosen": 0.3889945149421692, "rewards/margins": 0.012853950262069702, "rewards/rejected": 0.3761405646800995, "step": 198 }, { "epoch": 0.36, "learning_rate": 9.789895707807046e-08, "logits/chosen": -2.3908751010894775, "logits/rejected": -2.3687708377838135, "logps/chosen": -11.68734359741211, "logps/rejected": 0.0, "loss": 0.5823, "rewards/accuracies": 1.0, "rewards/chosen": 0.49052849411964417, "rewards/margins": 0.49052849411964417, "rewards/rejected": 0.0, "step": 199 }, { "epoch": 0.36, "learning_rate": 9.787086306635907e-08, "logits/chosen": -2.2753500938415527, "logits/rejected": -2.2799360752105713, "logps/chosen": -4.262354373931885, "logps/rejected": -5.169022560119629, "loss": 0.7531, "rewards/accuracies": 0.0, "rewards/chosen": 0.44593796133995056, "rewards/margins": -0.18201932311058044, "rewards/rejected": 0.627957284450531, "step": 200 }, { "epoch": 0.36, "learning_rate": 9.784258655636728e-08, "logits/chosen": -2.3017759323120117, "logits/rejected": -2.2979040145874023, "logps/chosen": -12.339349746704102, "logps/rejected": -3.139998197555542, "loss": 0.5979, "rewards/accuracies": 0.0, "rewards/chosen": 0.5362839102745056, "rewards/margins": -0.12167108058929443, "rewards/rejected": 0.6579549908638, "step": 201 }, { "epoch": 0.37, "learning_rate": 9.781412765589372e-08, "logits/chosen": -2.319573163986206, "logits/rejected": -2.319181203842163, "logps/chosen": -2.8181426525115967, "logps/rejected": -5.347375869750977, "loss": 0.5107, "rewards/accuracies": 1.0, "rewards/chosen": 0.47544699907302856, "rewards/margins": 0.08049994707107544, "rewards/rejected": 0.3949470520019531, "step": 202 }, { "epoch": 0.37, "learning_rate": 9.778548647343236e-08, "logits/chosen": -2.2657761573791504, "logits/rejected": -2.2655460834503174, "logps/chosen": -2.92142391204834, "logps/rejected": -1.5114569664001465, "loss": 0.5086, "rewards/accuracies": 1.0, "rewards/chosen": 0.4651229977607727, "rewards/margins": 0.09360605478286743, "rewards/rejected": 0.3715169429779053, "step": 203 }, { "epoch": 0.37, "learning_rate": 9.775666311817211e-08, "logits/chosen": -2.316037893295288, "logits/rejected": -2.316662311553955, "logps/chosen": -5.172751426696777, "logps/rejected": -4.309878349304199, "loss": 0.6628, "rewards/accuracies": 1.0, "rewards/chosen": 0.7170484662055969, "rewards/margins": 0.19556045532226562, "rewards/rejected": 0.5214880108833313, "step": 204 }, { "epoch": 0.37, "learning_rate": 9.772765769999635e-08, "logits/chosen": -2.357865333557129, "logits/rejected": -2.3606655597686768, "logps/chosen": -5.007294178009033, "logps/rejected": -2.3182528018951416, "loss": 0.531, "rewards/accuracies": 1.0, "rewards/chosen": 0.5927927494049072, "rewards/margins": 0.09927335381507874, "rewards/rejected": 0.4935193955898285, "step": 205 }, { "epoch": 0.37, "learning_rate": 9.769847032948257e-08, "logits/chosen": -2.4332661628723145, "logits/rejected": -2.4328675270080566, "logps/chosen": -4.2821221351623535, "logps/rejected": -14.0809907913208, "loss": 0.6863, "rewards/accuracies": 1.0, "rewards/chosen": 0.6438777446746826, "rewards/margins": 0.0275612473487854, "rewards/rejected": 0.6163164973258972, "step": 206 }, { "epoch": 0.37, "learning_rate": 9.766910111790189e-08, "logits/chosen": -2.3419671058654785, "logits/rejected": -2.3435606956481934, "logps/chosen": -6.314942359924316, "logps/rejected": -2.366579532623291, "loss": 0.6782, "rewards/accuracies": 1.0, "rewards/chosen": 0.5737418532371521, "rewards/margins": 0.1344122290611267, "rewards/rejected": 0.4393296241760254, "step": 207 }, { "epoch": 0.38, "learning_rate": 9.763955017721866e-08, "logits/chosen": -2.3690173625946045, "logits/rejected": -2.363070487976074, "logps/chosen": -3.151683807373047, "logps/rejected": -6.032668113708496, "loss": 0.6699, "rewards/accuracies": 1.0, "rewards/chosen": 0.6605873107910156, "rewards/margins": 0.33845701813697815, "rewards/rejected": 0.3221302926540375, "step": 208 }, { "epoch": 0.38, "learning_rate": 9.760981762009004e-08, "logits/chosen": -2.2908976078033447, "logits/rejected": -2.3204686641693115, "logps/chosen": -6.1388840675354, "logps/rejected": -18.518539428710938, "loss": 0.4559, "rewards/accuracies": 1.0, "rewards/chosen": 0.5197547674179077, "rewards/margins": 0.1196925938129425, "rewards/rejected": 0.4000621736049652, "step": 209 }, { "epoch": 0.38, "learning_rate": 9.75799035598656e-08, "logits/chosen": -2.30985426902771, "logits/rejected": -2.3072938919067383, "logps/chosen": -12.416911125183105, "logps/rejected": -2.809488296508789, "loss": 0.5335, "rewards/accuracies": 1.0, "rewards/chosen": 0.6988446116447449, "rewards/margins": 0.10890775918960571, "rewards/rejected": 0.5899368524551392, "step": 210 }, { "epoch": 0.38, "learning_rate": 9.754980811058682e-08, "logits/chosen": -2.2771735191345215, "logits/rejected": -2.281643867492676, "logps/chosen": -11.843945503234863, "logps/rejected": -4.230316162109375, "loss": 0.709, "rewards/accuracies": 0.0, "rewards/chosen": 0.5043549537658691, "rewards/margins": -0.06239902973175049, "rewards/rejected": 0.5667539834976196, "step": 211 }, { "epoch": 0.38, "learning_rate": 9.75195313869867e-08, "logits/chosen": -2.396153688430786, "logits/rejected": -2.3879387378692627, "logps/chosen": -3.069345712661743, "logps/rejected": -2.7361702919006348, "loss": 0.6593, "rewards/accuracies": 1.0, "rewards/chosen": 0.6098341941833496, "rewards/margins": 0.19114980101585388, "rewards/rejected": 0.4186843931674957, "step": 212 }, { "epoch": 0.39, "learning_rate": 9.748907350448931e-08, "logits/chosen": -2.377955675125122, "logits/rejected": -2.387160539627075, "logps/chosen": -4.6148762702941895, "logps/rejected": -3.826694965362549, "loss": 0.5332, "rewards/accuracies": 1.0, "rewards/chosen": 0.6288470029830933, "rewards/margins": 0.14090806245803833, "rewards/rejected": 0.48793894052505493, "step": 213 }, { "epoch": 0.39, "learning_rate": 9.745843457920936e-08, "logits/chosen": -2.283863067626953, "logits/rejected": -2.3535959720611572, "logps/chosen": -3.1551995277404785, "logps/rejected": -25.762279510498047, "loss": 0.7261, "rewards/accuracies": 1.0, "rewards/chosen": 0.5232486128807068, "rewards/margins": 0.31826967000961304, "rewards/rejected": 0.20497894287109375, "step": 214 }, { "epoch": 0.39, "learning_rate": 9.742761472795173e-08, "logits/chosen": -2.384920358657837, "logits/rejected": -2.3028035163879395, "logps/chosen": -14.421634674072266, "logps/rejected": -2.6865596771240234, "loss": 0.7659, "rewards/accuracies": 0.0, "rewards/chosen": 0.23195305466651917, "rewards/margins": -0.4570184051990509, "rewards/rejected": 0.6889714598655701, "step": 215 }, { "epoch": 0.39, "learning_rate": 9.73966140682111e-08, "logits/chosen": -2.2961559295654297, "logits/rejected": -2.3011720180511475, "logps/chosen": -4.505486011505127, "logps/rejected": -2.48882794380188, "loss": 0.7184, "rewards/accuracies": 0.0, "rewards/chosen": 0.5505902171134949, "rewards/margins": -0.13297808170318604, "rewards/rejected": 0.6835682988166809, "step": 216 }, { "epoch": 0.39, "learning_rate": 9.736543271817137e-08, "logits/chosen": -2.3465754985809326, "logits/rejected": -2.335463285446167, "logps/chosen": -11.000691413879395, "logps/rejected": 0.0, "loss": 0.7869, "rewards/accuracies": 1.0, "rewards/chosen": 0.4761538505554199, "rewards/margins": 0.4761538505554199, "rewards/rejected": 0.0, "step": 217 }, { "epoch": 0.39, "learning_rate": 9.733407079670533e-08, "logits/chosen": -2.198773145675659, "logits/rejected": -2.176098585128784, "logps/chosen": -10.78473949432373, "logps/rejected": 0.0, "loss": 0.6175, "rewards/accuracies": 1.0, "rewards/chosen": 0.5092892646789551, "rewards/margins": 0.5092892646789551, "rewards/rejected": 0.0, "step": 218 }, { "epoch": 0.4, "learning_rate": 9.730252842337416e-08, "logits/chosen": -2.3975839614868164, "logits/rejected": -2.4985947608947754, "logps/chosen": -3.4547674655914307, "logps/rejected": -47.430755615234375, "loss": 0.6845, "rewards/accuracies": 1.0, "rewards/chosen": 0.443173885345459, "rewards/margins": 0.455655574798584, "rewards/rejected": -0.012481689453125, "step": 219 }, { "epoch": 0.4, "learning_rate": 9.7270805718427e-08, "logits/chosen": -2.3636062145233154, "logits/rejected": -2.372788667678833, "logps/chosen": -2.21621036529541, "logps/rejected": -5.140174388885498, "loss": 0.8656, "rewards/accuracies": 0.0, "rewards/chosen": 0.5123258829116821, "rewards/margins": -0.07434695959091187, "rewards/rejected": 0.586672842502594, "step": 220 }, { "epoch": 0.4, "learning_rate": 9.72389028028004e-08, "logits/chosen": -2.405332088470459, "logits/rejected": -2.3981189727783203, "logps/chosen": -5.725787162780762, "logps/rejected": -6.836656093597412, "loss": 0.6741, "rewards/accuracies": 1.0, "rewards/chosen": 0.6511713862419128, "rewards/margins": 0.017770588397979736, "rewards/rejected": 0.6334007978439331, "step": 221 }, { "epoch": 0.4, "learning_rate": 9.7206819798118e-08, "logits/chosen": -2.4087066650390625, "logits/rejected": -2.4361047744750977, "logps/chosen": -2.8642497062683105, "logps/rejected": -11.070446968078613, "loss": 0.6418, "rewards/accuracies": 1.0, "rewards/chosen": 0.6139791011810303, "rewards/margins": 0.21889060735702515, "rewards/rejected": 0.3950884938240051, "step": 222 }, { "epoch": 0.4, "learning_rate": 9.717455682668995e-08, "logits/chosen": -2.3823161125183105, "logits/rejected": -2.405336380004883, "logps/chosen": -3.9398422241210938, "logps/rejected": -8.086551666259766, "loss": 0.5346, "rewards/accuracies": 1.0, "rewards/chosen": 0.7151027917861938, "rewards/margins": 0.15272611379623413, "rewards/rejected": 0.5623766779899597, "step": 223 }, { "epoch": 0.41, "learning_rate": 9.714211401151253e-08, "logits/chosen": -2.289167642593384, "logits/rejected": -2.3050119876861572, "logps/chosen": -2.8449316024780273, "logps/rejected": -10.958911895751953, "loss": 0.6473, "rewards/accuracies": 1.0, "rewards/chosen": 0.6608832478523254, "rewards/margins": 0.3074800670146942, "rewards/rejected": 0.3534031808376312, "step": 224 }, { "epoch": 0.41, "learning_rate": 9.710949147626759e-08, "logits/chosen": -2.377540111541748, "logits/rejected": -2.381276845932007, "logps/chosen": -3.7909631729125977, "logps/rejected": -3.619978666305542, "loss": 0.689, "rewards/accuracies": 1.0, "rewards/chosen": 0.5970417261123657, "rewards/margins": 0.01670163869857788, "rewards/rejected": 0.5803400874137878, "step": 225 }, { "epoch": 0.41, "learning_rate": 9.707668934532215e-08, "logits/chosen": -2.3004395961761475, "logits/rejected": -2.326728343963623, "logps/chosen": -4.393167495727539, "logps/rejected": -25.808460235595703, "loss": 0.6175, "rewards/accuracies": 1.0, "rewards/chosen": 0.4972420632839203, "rewards/margins": 0.2239338755607605, "rewards/rejected": 0.2733081877231598, "step": 226 }, { "epoch": 0.41, "learning_rate": 9.70437077437279e-08, "logits/chosen": -2.4296131134033203, "logits/rejected": -2.4184794425964355, "logps/chosen": -4.946038246154785, "logps/rejected": -2.419128656387329, "loss": 0.7163, "rewards/accuracies": 0.0, "rewards/chosen": 0.5221438407897949, "rewards/margins": -0.09050703048706055, "rewards/rejected": 0.6126508712768555, "step": 227 }, { "epoch": 0.41, "learning_rate": 9.701054679722074e-08, "logits/chosen": -2.168962001800537, "logits/rejected": -2.1681630611419678, "logps/chosen": -1.843073844909668, "logps/rejected": -2.0889973640441895, "loss": 0.7161, "rewards/accuracies": 0.0, "rewards/chosen": 0.43019938468933105, "rewards/margins": -0.012119382619857788, "rewards/rejected": 0.44231876730918884, "step": 228 }, { "epoch": 0.41, "learning_rate": 9.697720663222028e-08, "logits/chosen": -2.34493088722229, "logits/rejected": -2.3392908573150635, "logps/chosen": -1.197766900062561, "logps/rejected": -3.6388256549835205, "loss": 0.7402, "rewards/accuracies": 0.0, "rewards/chosen": 0.49442192912101746, "rewards/margins": -0.16529175639152527, "rewards/rejected": 0.6597136855125427, "step": 229 }, { "epoch": 0.42, "learning_rate": 9.694368737582937e-08, "logits/chosen": -2.4023842811584473, "logits/rejected": -2.509459972381592, "logps/chosen": -2.5120906829833984, "logps/rejected": -41.712669372558594, "loss": 0.4619, "rewards/accuracies": 1.0, "rewards/chosen": 0.5400682687759399, "rewards/margins": 0.49211829900741577, "rewards/rejected": 0.047949980944395065, "step": 230 }, { "epoch": 0.42, "learning_rate": 9.690998915583357e-08, "logits/chosen": -2.38896107673645, "logits/rejected": -2.389036178588867, "logps/chosen": -4.858249664306641, "logps/rejected": -2.6391477584838867, "loss": 0.6197, "rewards/accuracies": 1.0, "rewards/chosen": 0.6734529733657837, "rewards/margins": 0.26664620637893677, "rewards/rejected": 0.4068067669868469, "step": 231 }, { "epoch": 0.42, "learning_rate": 9.687611210070077e-08, "logits/chosen": -2.3351898193359375, "logits/rejected": -2.3315327167510986, "logps/chosen": -4.172646999359131, "logps/rejected": -13.152446746826172, "loss": 0.5965, "rewards/accuracies": 1.0, "rewards/chosen": 0.7657180428504944, "rewards/margins": 0.5444610118865967, "rewards/rejected": 0.2212570160627365, "step": 232 }, { "epoch": 0.42, "learning_rate": 9.684205633958058e-08, "logits/chosen": -2.279714345932007, "logits/rejected": -2.3483688831329346, "logps/chosen": -3.406737804412842, "logps/rejected": -31.066646575927734, "loss": 0.6043, "rewards/accuracies": 1.0, "rewards/chosen": 0.5334436297416687, "rewards/margins": 0.27909526228904724, "rewards/rejected": 0.25434836745262146, "step": 233 }, { "epoch": 0.42, "learning_rate": 9.680782200230393e-08, "logits/chosen": -2.4653983116149902, "logits/rejected": -2.442166805267334, "logps/chosen": -1.746768832206726, "logps/rejected": 0.0, "loss": 0.5914, "rewards/accuracies": 1.0, "rewards/chosen": 0.48294907808303833, "rewards/margins": 0.48294907808303833, "rewards/rejected": 0.0, "step": 234 }, { "epoch": 0.42, "learning_rate": 9.67734092193825e-08, "logits/chosen": -2.2312092781066895, "logits/rejected": -2.239921808242798, "logps/chosen": -2.5859415531158447, "logps/rejected": -8.202956199645996, "loss": 0.6975, "rewards/accuracies": 0.0, "rewards/chosen": 0.6102782487869263, "rewards/margins": -0.14374703168869019, "rewards/rejected": 0.7540252804756165, "step": 235 }, { "epoch": 0.43, "learning_rate": 9.67388181220083e-08, "logits/chosen": -2.3784663677215576, "logits/rejected": -2.353731393814087, "logps/chosen": -3.9311068058013916, "logps/rejected": 0.0, "loss": 0.5568, "rewards/accuracies": 1.0, "rewards/chosen": 0.7978917956352234, "rewards/margins": 0.7978917956352234, "rewards/rejected": 0.0, "step": 236 }, { "epoch": 0.43, "learning_rate": 9.67040488420531e-08, "logits/chosen": -2.2506117820739746, "logits/rejected": -2.2414462566375732, "logps/chosen": -10.408443450927734, "logps/rejected": -11.268767356872559, "loss": 0.7624, "rewards/accuracies": 0.0, "rewards/chosen": 0.25402727723121643, "rewards/margins": -0.3897230327129364, "rewards/rejected": 0.6437503099441528, "step": 237 }, { "epoch": 0.43, "learning_rate": 9.666910151206796e-08, "logits/chosen": -2.4239087104797363, "logits/rejected": -2.4263439178466797, "logps/chosen": -3.2662320137023926, "logps/rejected": -2.693303346633911, "loss": 0.6989, "rewards/accuracies": 0.0, "rewards/chosen": 0.6600556969642639, "rewards/margins": -0.0065346360206604, "rewards/rejected": 0.6665903329849243, "step": 238 }, { "epoch": 0.43, "learning_rate": 9.663397626528271e-08, "logits/chosen": -2.5082831382751465, "logits/rejected": -2.5098297595977783, "logps/chosen": -3.2126166820526123, "logps/rejected": -11.020687103271484, "loss": 0.6495, "rewards/accuracies": 1.0, "rewards/chosen": 0.7067433595657349, "rewards/margins": 0.3211841881275177, "rewards/rejected": 0.38555917143821716, "step": 239 }, { "epoch": 0.43, "learning_rate": 9.65986732356055e-08, "logits/chosen": -2.3466625213623047, "logits/rejected": -2.389341115951538, "logps/chosen": -1.8188446760177612, "logps/rejected": -28.870891571044922, "loss": 0.5819, "rewards/accuracies": 1.0, "rewards/chosen": 0.49349308013916016, "rewards/margins": 0.18354585766792297, "rewards/rejected": 0.3099472224712372, "step": 240 }, { "epoch": 0.44, "learning_rate": 9.656319255762218e-08, "logits/chosen": -2.366067886352539, "logits/rejected": -2.3625266551971436, "logps/chosen": -3.3140501976013184, "logps/rejected": -10.113800048828125, "loss": 0.5757, "rewards/accuracies": 1.0, "rewards/chosen": 0.6645026803016663, "rewards/margins": 0.4466814398765564, "rewards/rejected": 0.21782122552394867, "step": 241 }, { "epoch": 0.44, "learning_rate": 9.652753436659589e-08, "logits/chosen": -2.395320415496826, "logits/rejected": -2.395087718963623, "logps/chosen": -2.127857208251953, "logps/rejected": -15.104784965515137, "loss": 0.6797, "rewards/accuracies": 0.0, "rewards/chosen": 0.5877832770347595, "rewards/margins": -0.18396592140197754, "rewards/rejected": 0.7717491984367371, "step": 242 }, { "epoch": 0.44, "learning_rate": 9.64916987984665e-08, "logits/chosen": -2.364956855773926, "logits/rejected": -2.3685266971588135, "logps/chosen": -2.124073028564453, "logps/rejected": -1.4010701179504395, "loss": 0.6729, "rewards/accuracies": 0.0, "rewards/chosen": 0.6825218200683594, "rewards/margins": -0.05531412363052368, "rewards/rejected": 0.7378359436988831, "step": 243 }, { "epoch": 0.44, "learning_rate": 9.645568598985008e-08, "logits/chosen": -2.397162675857544, "logits/rejected": -2.3982763290405273, "logps/chosen": -4.022350311279297, "logps/rejected": -4.354227066040039, "loss": 0.6342, "rewards/accuracies": 1.0, "rewards/chosen": 0.6240444183349609, "rewards/margins": 0.13859987258911133, "rewards/rejected": 0.4854445457458496, "step": 244 }, { "epoch": 0.44, "learning_rate": 9.641949607803841e-08, "logits/chosen": -2.3977997303009033, "logits/rejected": -2.403123140335083, "logps/chosen": -11.122123718261719, "logps/rejected": -7.510372638702393, "loss": 0.7112, "rewards/accuracies": 1.0, "rewards/chosen": 0.597411572933197, "rewards/margins": 0.11056944727897644, "rewards/rejected": 0.4868421256542206, "step": 245 }, { "epoch": 0.44, "learning_rate": 9.63831292009984e-08, "logits/chosen": -2.3570566177368164, "logits/rejected": -2.4314308166503906, "logps/chosen": -2.239889144897461, "logps/rejected": -39.278282165527344, "loss": 0.5659, "rewards/accuracies": 1.0, "rewards/chosen": 0.521850049495697, "rewards/margins": 0.6128504276275635, "rewards/rejected": -0.09100037068128586, "step": 246 }, { "epoch": 0.45, "learning_rate": 9.634658549737168e-08, "logits/chosen": -2.376845121383667, "logits/rejected": -2.383490562438965, "logps/chosen": -4.1121745109558105, "logps/rejected": -3.671985149383545, "loss": 0.712, "rewards/accuracies": 1.0, "rewards/chosen": 0.4731507897377014, "rewards/margins": 0.1288626790046692, "rewards/rejected": 0.3442881107330322, "step": 247 }, { "epoch": 0.45, "learning_rate": 9.630986510647397e-08, "logits/chosen": -2.3040690422058105, "logits/rejected": -2.3011128902435303, "logps/chosen": -3.769254207611084, "logps/rejected": -3.105839252471924, "loss": 0.672, "rewards/accuracies": 0.0, "rewards/chosen": 0.6338714361190796, "rewards/margins": -0.004640638828277588, "rewards/rejected": 0.6385120749473572, "step": 248 }, { "epoch": 0.45, "learning_rate": 9.62729681682945e-08, "logits/chosen": -2.2748236656188965, "logits/rejected": -2.265937328338623, "logps/chosen": -2.2033162117004395, "logps/rejected": -4.795928955078125, "loss": 0.6667, "rewards/accuracies": 1.0, "rewards/chosen": 0.7349599599838257, "rewards/margins": 0.2385777235031128, "rewards/rejected": 0.4963822364807129, "step": 249 }, { "epoch": 0.45, "learning_rate": 9.623589482349565e-08, "logits/chosen": -2.233302354812622, "logits/rejected": -2.297302722930908, "logps/chosen": -3.4363718032836914, "logps/rejected": -31.34482765197754, "loss": 0.4397, "rewards/accuracies": 1.0, "rewards/chosen": 0.7967520952224731, "rewards/margins": 0.8804211616516113, "rewards/rejected": -0.08366908878087997, "step": 250 }, { "epoch": 0.45, "learning_rate": 9.619864521341228e-08, "logits/chosen": -2.3489556312561035, "logits/rejected": -2.348475694656372, "logps/chosen": -4.074391841888428, "logps/rejected": -9.805932998657227, "loss": 0.5462, "rewards/accuracies": 1.0, "rewards/chosen": 0.9602167010307312, "rewards/margins": 0.5845311880111694, "rewards/rejected": 0.37568551301956177, "step": 251 }, { "epoch": 0.46, "learning_rate": 9.616121948005122e-08, "logits/chosen": -2.3697986602783203, "logits/rejected": -2.483060836791992, "logps/chosen": -3.2572731971740723, "logps/rejected": -33.83941650390625, "loss": 0.5302, "rewards/accuracies": 1.0, "rewards/chosen": 0.4859198033809662, "rewards/margins": 0.8126621842384338, "rewards/rejected": -0.32674238085746765, "step": 252 }, { "epoch": 0.46, "learning_rate": 9.612361776609075e-08, "logits/chosen": -2.3618268966674805, "logits/rejected": -2.3592751026153564, "logps/chosen": -1.4179353713989258, "logps/rejected": -10.387738227844238, "loss": 0.7184, "rewards/accuracies": 0.0, "rewards/chosen": 0.594865620136261, "rewards/margins": -0.15562409162521362, "rewards/rejected": 0.7504897117614746, "step": 253 }, { "epoch": 0.46, "learning_rate": 9.608584021488002e-08, "logits/chosen": -2.1915953159332275, "logits/rejected": -2.189821481704712, "logps/chosen": -5.35857629776001, "logps/rejected": -3.0600380897521973, "loss": 0.6329, "rewards/accuracies": 1.0, "rewards/chosen": 0.8100021481513977, "rewards/margins": 0.18201583623886108, "rewards/rejected": 0.6279863119125366, "step": 254 }, { "epoch": 0.46, "learning_rate": 9.604788697043854e-08, "logits/chosen": -2.3829128742218018, "logits/rejected": -2.38161301612854, "logps/chosen": -3.832341194152832, "logps/rejected": -2.184143304824829, "loss": 0.7093, "rewards/accuracies": 0.0, "rewards/chosen": 0.628440797328949, "rewards/margins": -0.03246593475341797, "rewards/rejected": 0.6609067320823669, "step": 255 }, { "epoch": 0.46, "learning_rate": 9.600975817745562e-08, "logits/chosen": -2.330493211746216, "logits/rejected": -2.330960273742676, "logps/chosen": -4.793565273284912, "logps/rejected": -4.807404518127441, "loss": 0.5648, "rewards/accuracies": 0.0, "rewards/chosen": 0.7221916913986206, "rewards/margins": -0.04289132356643677, "rewards/rejected": 0.7650830149650574, "step": 256 }, { "epoch": 0.46, "learning_rate": 9.59714539812898e-08, "logits/chosen": -2.3190553188323975, "logits/rejected": -2.3231847286224365, "logps/chosen": -2.4631083011627197, "logps/rejected": -3.4913339614868164, "loss": 0.7166, "rewards/accuracies": 0.0, "rewards/chosen": 0.5277588963508606, "rewards/margins": -0.06890618801116943, "rewards/rejected": 0.59666508436203, "step": 257 }, { "epoch": 0.47, "learning_rate": 9.593297452796832e-08, "logits/chosen": -2.4046549797058105, "logits/rejected": -2.409998655319214, "logps/chosen": -1.6997430324554443, "logps/rejected": -5.199511528015137, "loss": 0.5499, "rewards/accuracies": 0.0, "rewards/chosen": 0.5452906489372253, "rewards/margins": -0.16091644763946533, "rewards/rejected": 0.7062070965766907, "step": 258 }, { "epoch": 0.47, "learning_rate": 9.589431996418655e-08, "logits/chosen": -2.278995990753174, "logits/rejected": -2.280050039291382, "logps/chosen": -3.347907066345215, "logps/rejected": -9.834823608398438, "loss": 0.6295, "rewards/accuracies": 1.0, "rewards/chosen": 0.7481862306594849, "rewards/margins": 0.06841939687728882, "rewards/rejected": 0.679766833782196, "step": 259 }, { "epoch": 0.47, "learning_rate": 9.585549043730746e-08, "logits/chosen": -2.213982582092285, "logits/rejected": -2.2117812633514404, "logps/chosen": -2.2539024353027344, "logps/rejected": -3.9675607681274414, "loss": 0.7348, "rewards/accuracies": 1.0, "rewards/chosen": 0.44242578744888306, "rewards/margins": 0.03132149577140808, "rewards/rejected": 0.411104291677475, "step": 260 }, { "epoch": 0.47, "learning_rate": 9.581648609536097e-08, "logits/chosen": -2.3699731826782227, "logits/rejected": -2.372689962387085, "logps/chosen": -1.919543743133545, "logps/rejected": -2.416842222213745, "loss": 0.7751, "rewards/accuracies": 0.0, "rewards/chosen": 0.5282416343688965, "rewards/margins": -0.04384797811508179, "rewards/rejected": 0.5720896124839783, "step": 261 }, { "epoch": 0.47, "learning_rate": 9.577730708704353e-08, "logits/chosen": -2.4292538166046143, "logits/rejected": -2.45642352104187, "logps/chosen": 0.0, "logps/rejected": -3.144446611404419, "loss": 0.8885, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": -0.6707925200462341, "rewards/rejected": 0.6707925200462341, "step": 262 }, { "epoch": 0.48, "learning_rate": 9.573795356171738e-08, "logits/chosen": -2.318148612976074, "logits/rejected": -2.348675489425659, "logps/chosen": -3.711941719055176, "logps/rejected": -14.179472923278809, "loss": 0.6241, "rewards/accuracies": 0.0, "rewards/chosen": 0.44553670287132263, "rewards/margins": -0.24982169270515442, "rewards/rejected": 0.695358395576477, "step": 263 }, { "epoch": 0.48, "learning_rate": 9.569842566941016e-08, "logits/chosen": -2.3207859992980957, "logits/rejected": -2.326975107192993, "logps/chosen": -2.8391547203063965, "logps/rejected": -1.7661354541778564, "loss": 0.6307, "rewards/accuracies": 1.0, "rewards/chosen": 0.6911988854408264, "rewards/margins": 0.08401131629943848, "rewards/rejected": 0.6071875691413879, "step": 264 }, { "epoch": 0.48, "learning_rate": 9.565872356081419e-08, "logits/chosen": -2.322751522064209, "logits/rejected": -2.330683946609497, "logps/chosen": -3.247523069381714, "logps/rejected": -11.503047943115234, "loss": 0.5515, "rewards/accuracies": 1.0, "rewards/chosen": 0.5610513091087341, "rewards/margins": 0.7796289920806885, "rewards/rejected": -0.21857766807079315, "step": 265 }, { "epoch": 0.48, "learning_rate": 9.561884738728598e-08, "logits/chosen": -2.388125419616699, "logits/rejected": -2.38537335395813, "logps/chosen": -1.4100054502487183, "logps/rejected": -7.548962116241455, "loss": 0.6247, "rewards/accuracies": 1.0, "rewards/chosen": 0.6343473196029663, "rewards/margins": 0.17342492938041687, "rewards/rejected": 0.46092239022254944, "step": 266 }, { "epoch": 0.48, "learning_rate": 9.557879730084563e-08, "logits/chosen": -2.2647314071655273, "logits/rejected": -2.260348081588745, "logps/chosen": -5.261428356170654, "logps/rejected": -6.5563859939575195, "loss": 0.6369, "rewards/accuracies": 1.0, "rewards/chosen": 0.6773459911346436, "rewards/margins": 0.17491304874420166, "rewards/rejected": 0.5024329423904419, "step": 267 }, { "epoch": 0.48, "learning_rate": 9.553857345417624e-08, "logits/chosen": -2.384932518005371, "logits/rejected": -2.3906307220458984, "logps/chosen": -1.414265751838684, "logps/rejected": -3.6487317085266113, "loss": 0.7281, "rewards/accuracies": 0.0, "rewards/chosen": 0.4933811128139496, "rewards/margins": -0.13529619574546814, "rewards/rejected": 0.6286773085594177, "step": 268 }, { "epoch": 0.49, "learning_rate": 9.549817600062333e-08, "logits/chosen": -2.3760735988616943, "logits/rejected": -2.376476526260376, "logps/chosen": -3.878464698791504, "logps/rejected": -2.164762496948242, "loss": 0.6593, "rewards/accuracies": 1.0, "rewards/chosen": 0.4914284646511078, "rewards/margins": 0.00019171833992004395, "rewards/rejected": 0.49123674631118774, "step": 269 }, { "epoch": 0.49, "learning_rate": 9.545760509419428e-08, "logits/chosen": -2.2942888736724854, "logits/rejected": -2.297701835632324, "logps/chosen": -2.7852602005004883, "logps/rejected": -1.7898550033569336, "loss": 0.722, "rewards/accuracies": 0.0, "rewards/chosen": 0.4691639840602875, "rewards/margins": -0.03900626301765442, "rewards/rejected": 0.5081702470779419, "step": 270 }, { "epoch": 0.49, "learning_rate": 9.54168608895577e-08, "logits/chosen": -2.42463755607605, "logits/rejected": -2.421370506286621, "logps/chosen": -1.8172876834869385, "logps/rejected": -2.42478084564209, "loss": 0.5869, "rewards/accuracies": 0.0, "rewards/chosen": 0.38567179441452026, "rewards/margins": -0.1516786813735962, "rewards/rejected": 0.5373504757881165, "step": 271 }, { "epoch": 0.49, "learning_rate": 9.537594354204293e-08, "logits/chosen": -2.2894654273986816, "logits/rejected": -2.2682039737701416, "logps/chosen": -19.310150146484375, "logps/rejected": -3.1624484062194824, "loss": 0.7801, "rewards/accuracies": 0.0, "rewards/chosen": 0.4598381221294403, "rewards/margins": -0.1442265808582306, "rewards/rejected": 0.6040647029876709, "step": 272 }, { "epoch": 0.49, "learning_rate": 9.533485320763927e-08, "logits/chosen": -2.430495262145996, "logits/rejected": -2.4338817596435547, "logps/chosen": -2.8368301391601562, "logps/rejected": -13.518211364746094, "loss": 0.7051, "rewards/accuracies": 0.0, "rewards/chosen": 0.7698001861572266, "rewards/margins": -0.08209514617919922, "rewards/rejected": 0.8518953323364258, "step": 273 }, { "epoch": 0.5, "learning_rate": 9.529359004299562e-08, "logits/chosen": -2.525515079498291, "logits/rejected": -2.539742946624756, "logps/chosen": -1.8434194326400757, "logps/rejected": -16.04983139038086, "loss": 0.7069, "rewards/accuracies": 1.0, "rewards/chosen": 0.5818200707435608, "rewards/margins": 0.07275384664535522, "rewards/rejected": 0.5090662240982056, "step": 274 }, { "epoch": 0.5, "learning_rate": 9.525215420541968e-08, "logits/chosen": -2.3558905124664307, "logits/rejected": -2.3531723022460938, "logps/chosen": -2.2073235511779785, "logps/rejected": -0.7905623912811279, "loss": 0.8078, "rewards/accuracies": 1.0, "rewards/chosen": 0.5761736035346985, "rewards/margins": 0.06108677387237549, "rewards/rejected": 0.515086829662323, "step": 275 }, { "epoch": 0.5, "learning_rate": 9.521054585287744e-08, "logits/chosen": -2.198732614517212, "logits/rejected": -2.198951482772827, "logps/chosen": -2.0719990730285645, "logps/rejected": -5.99657678604126, "loss": 0.7375, "rewards/accuracies": 1.0, "rewards/chosen": 0.540519654750824, "rewards/margins": 0.187707781791687, "rewards/rejected": 0.35281187295913696, "step": 276 }, { "epoch": 0.5, "learning_rate": 9.516876514399264e-08, "logits/chosen": -2.3252906799316406, "logits/rejected": -2.3241758346557617, "logps/chosen": -2.250999689102173, "logps/rejected": -2.1948394775390625, "loss": 0.7133, "rewards/accuracies": 0.0, "rewards/chosen": 0.5765529870986938, "rewards/margins": -0.10231977701187134, "rewards/rejected": 0.6788727641105652, "step": 277 }, { "epoch": 0.5, "learning_rate": 9.512681223804598e-08, "logits/chosen": -2.3127293586730957, "logits/rejected": -2.318131446838379, "logps/chosen": -2.466280221939087, "logps/rejected": -2.9073166847229004, "loss": 0.5866, "rewards/accuracies": 1.0, "rewards/chosen": 0.543389618396759, "rewards/margins": 0.0972575843334198, "rewards/rejected": 0.44613203406333923, "step": 278 }, { "epoch": 0.5, "learning_rate": 9.508468729497475e-08, "logits/chosen": -2.281351327896118, "logits/rejected": -2.3471834659576416, "logps/chosen": -2.793694019317627, "logps/rejected": -39.29352569580078, "loss": 0.6537, "rewards/accuracies": 1.0, "rewards/chosen": 0.6049371361732483, "rewards/margins": 0.547199010848999, "rewards/rejected": 0.05773811414837837, "step": 279 }, { "epoch": 0.51, "learning_rate": 9.504239047537197e-08, "logits/chosen": -2.36399507522583, "logits/rejected": -2.3642807006835938, "logps/chosen": -1.8722658157348633, "logps/rejected": -12.582772254943848, "loss": 0.7013, "rewards/accuracies": 0.0, "rewards/chosen": 0.5899094343185425, "rewards/margins": -0.22340303659439087, "rewards/rejected": 0.8133124709129333, "step": 280 }, { "epoch": 0.51, "learning_rate": 9.499992194048602e-08, "logits/chosen": -2.4255712032318115, "logits/rejected": -2.4291601181030273, "logps/chosen": -4.307476997375488, "logps/rejected": -10.547819137573242, "loss": 0.7522, "rewards/accuracies": 0.0, "rewards/chosen": 0.44612130522727966, "rewards/margins": -0.29551324248313904, "rewards/rejected": 0.7416345477104187, "step": 281 }, { "epoch": 0.51, "learning_rate": 9.495728185221985e-08, "logits/chosen": -2.3681342601776123, "logits/rejected": -2.3677470684051514, "logps/chosen": -1.6748195886611938, "logps/rejected": -1.7284667491912842, "loss": 0.5633, "rewards/accuracies": 0.0, "rewards/chosen": 0.3999640643596649, "rewards/margins": -0.010440438985824585, "rewards/rejected": 0.4104045033454895, "step": 282 }, { "epoch": 0.51, "learning_rate": 9.491447037313045e-08, "logits/chosen": -2.359374761581421, "logits/rejected": -2.3928029537200928, "logps/chosen": -7.319305896759033, "logps/rejected": -9.17973518371582, "loss": 0.6238, "rewards/accuracies": 0.0, "rewards/chosen": 0.6651219725608826, "rewards/margins": -0.019002199172973633, "rewards/rejected": 0.6841241717338562, "step": 283 }, { "epoch": 0.51, "learning_rate": 9.487148766642817e-08, "logits/chosen": -2.3286831378936768, "logits/rejected": -2.334005832672119, "logps/chosen": -6.266202926635742, "logps/rejected": -3.3832974433898926, "loss": 0.6839, "rewards/accuracies": 1.0, "rewards/chosen": 0.764245331287384, "rewards/margins": 0.04144418239593506, "rewards/rejected": 0.722801148891449, "step": 284 }, { "epoch": 0.52, "learning_rate": 9.482833389597615e-08, "logits/chosen": -2.3389317989349365, "logits/rejected": -2.3410425186157227, "logps/chosen": -1.772551417350769, "logps/rejected": -2.775063991546631, "loss": 0.7026, "rewards/accuracies": 0.0, "rewards/chosen": 0.6717392802238464, "rewards/margins": -0.0103224515914917, "rewards/rejected": 0.6820617318153381, "step": 285 }, { "epoch": 0.52, "learning_rate": 9.478500922628971e-08, "logits/chosen": -2.291442632675171, "logits/rejected": -2.2917935848236084, "logps/chosen": -2.3292720317840576, "logps/rejected": -2.3406786918640137, "loss": 0.6019, "rewards/accuracies": 0.0, "rewards/chosen": 0.5136780738830566, "rewards/margins": -0.0924222469329834, "rewards/rejected": 0.60610032081604, "step": 286 }, { "epoch": 0.52, "learning_rate": 9.474151382253564e-08, "logits/chosen": -2.3235185146331787, "logits/rejected": -2.357576370239258, "logps/chosen": -2.321692943572998, "logps/rejected": -31.602596282958984, "loss": 0.5116, "rewards/accuracies": 1.0, "rewards/chosen": 0.6596267223358154, "rewards/margins": 0.05732303857803345, "rewards/rejected": 0.602303683757782, "step": 287 }, { "epoch": 0.52, "learning_rate": 9.469784785053162e-08, "logits/chosen": -2.358380079269409, "logits/rejected": -2.4254066944122314, "logps/chosen": -5.0216875076293945, "logps/rejected": -27.266246795654297, "loss": 0.645, "rewards/accuracies": 1.0, "rewards/chosen": 0.7875203490257263, "rewards/margins": 0.09431242942810059, "rewards/rejected": 0.6932079195976257, "step": 288 }, { "epoch": 0.52, "learning_rate": 9.465401147674563e-08, "logits/chosen": -2.360382080078125, "logits/rejected": -2.35780668258667, "logps/chosen": -1.1358790397644043, "logps/rejected": -4.417881011962891, "loss": 0.5484, "rewards/accuracies": 0.0, "rewards/chosen": 0.4869241416454315, "rewards/margins": -0.04721835255622864, "rewards/rejected": 0.5341424942016602, "step": 289 }, { "epoch": 0.52, "learning_rate": 9.461000486829527e-08, "logits/chosen": -2.370779514312744, "logits/rejected": -2.348820447921753, "logps/chosen": -1.6909173727035522, "logps/rejected": 0.0, "loss": 0.5683, "rewards/accuracies": 1.0, "rewards/chosen": 0.5756546854972839, "rewards/margins": 0.5756546854972839, "rewards/rejected": 0.0, "step": 290 }, { "epoch": 0.53, "learning_rate": 9.456582819294708e-08, "logits/chosen": -2.2728939056396484, "logits/rejected": -2.271587610244751, "logps/chosen": -9.936118125915527, "logps/rejected": -3.1241819858551025, "loss": 0.7524, "rewards/accuracies": 0.0, "rewards/chosen": 0.3613360524177551, "rewards/margins": -0.08932885527610779, "rewards/rejected": 0.4506649076938629, "step": 291 }, { "epoch": 0.53, "learning_rate": 9.452148161911596e-08, "logits/chosen": -2.378807306289673, "logits/rejected": -2.3528871536254883, "logps/chosen": -1.1499301195144653, "logps/rejected": 0.0, "loss": 0.45, "rewards/accuracies": 1.0, "rewards/chosen": 0.4424055516719818, "rewards/margins": 0.4424055516719818, "rewards/rejected": 0.0, "step": 292 }, { "epoch": 0.53, "learning_rate": 9.447696531586457e-08, "logits/chosen": -2.426741600036621, "logits/rejected": -2.418740749359131, "logps/chosen": -9.488700866699219, "logps/rejected": -2.334355115890503, "loss": 0.5022, "rewards/accuracies": 1.0, "rewards/chosen": 0.6936081051826477, "rewards/margins": 0.09128445386886597, "rewards/rejected": 0.6023236513137817, "step": 293 }, { "epoch": 0.53, "learning_rate": 9.443227945290255e-08, "logits/chosen": -2.1841161251068115, "logits/rejected": -2.1855363845825195, "logps/chosen": -4.83660888671875, "logps/rejected": -10.294363975524902, "loss": 0.6375, "rewards/accuracies": 0.0, "rewards/chosen": 0.6758131384849548, "rewards/margins": -0.07213234901428223, "rewards/rejected": 0.7479454874992371, "step": 294 }, { "epoch": 0.53, "learning_rate": 9.438742420058602e-08, "logits/chosen": -2.2951128482818604, "logits/rejected": -2.2854197025299072, "logps/chosen": -4.73508882522583, "logps/rejected": -6.557504653930664, "loss": 0.7231, "rewards/accuracies": 0.0, "rewards/chosen": 0.5339500904083252, "rewards/margins": -0.3513581156730652, "rewards/rejected": 0.8853082060813904, "step": 295 }, { "epoch": 0.54, "learning_rate": 9.434239972991682e-08, "logits/chosen": -2.2630748748779297, "logits/rejected": -2.421210527420044, "logps/chosen": -2.45831298828125, "logps/rejected": -35.79515838623047, "loss": 0.4763, "rewards/accuracies": 1.0, "rewards/chosen": 0.6109153032302856, "rewards/margins": 0.8046302199363708, "rewards/rejected": -0.193714901804924, "step": 296 }, { "epoch": 0.54, "learning_rate": 9.429720621254192e-08, "logits/chosen": -2.3237788677215576, "logits/rejected": -2.323580503463745, "logps/chosen": -2.4324910640716553, "logps/rejected": -0.9215617179870605, "loss": 0.5578, "rewards/accuracies": 1.0, "rewards/chosen": 0.4412618577480316, "rewards/margins": 0.06227850914001465, "rewards/rejected": 0.37898334860801697, "step": 297 }, { "epoch": 0.54, "learning_rate": 9.425184382075275e-08, "logits/chosen": -2.357114553451538, "logits/rejected": -2.3723714351654053, "logps/chosen": -4.537993907928467, "logps/rejected": -11.808740615844727, "loss": 0.7276, "rewards/accuracies": 0.0, "rewards/chosen": 0.500869631767273, "rewards/margins": -0.0819053053855896, "rewards/rejected": 0.5827749371528625, "step": 298 }, { "epoch": 0.54, "learning_rate": 9.420631272748454e-08, "logits/chosen": -2.223703622817993, "logits/rejected": -2.207719087600708, "logps/chosen": -9.031664848327637, "logps/rejected": -3.058020830154419, "loss": 0.6738, "rewards/accuracies": 1.0, "rewards/chosen": 0.6679931879043579, "rewards/margins": 0.016087234020233154, "rewards/rejected": 0.6519059538841248, "step": 299 }, { "epoch": 0.54, "learning_rate": 9.416061310631565e-08, "logits/chosen": -2.3346316814422607, "logits/rejected": -2.339890241622925, "logps/chosen": -2.6804208755493164, "logps/rejected": -3.57045841217041, "loss": 0.8142, "rewards/accuracies": 1.0, "rewards/chosen": 0.47967228293418884, "rewards/margins": 0.041707128286361694, "rewards/rejected": 0.43796515464782715, "step": 300 }, { "epoch": 0.54, "learning_rate": 9.41147451314669e-08, "logits/chosen": -2.3724944591522217, "logits/rejected": -2.3700528144836426, "logps/chosen": -2.4904255867004395, "logps/rejected": -1.7938047647476196, "loss": 0.6988, "rewards/accuracies": 1.0, "rewards/chosen": 0.6615045070648193, "rewards/margins": 0.18905583024024963, "rewards/rejected": 0.4724486768245697, "step": 301 }, { "epoch": 0.55, "learning_rate": 9.4068708977801e-08, "logits/chosen": -2.1951117515563965, "logits/rejected": -2.2741611003875732, "logps/chosen": -2.6486716270446777, "logps/rejected": -33.51774978637695, "loss": 0.5845, "rewards/accuracies": 1.0, "rewards/chosen": 0.5200797319412231, "rewards/margins": 0.3184272050857544, "rewards/rejected": 0.20165252685546875, "step": 302 }, { "epoch": 0.55, "learning_rate": 9.402250482082173e-08, "logits/chosen": -2.319333076477051, "logits/rejected": -2.3192219734191895, "logps/chosen": -4.820881366729736, "logps/rejected": -11.59329891204834, "loss": 0.6911, "rewards/accuracies": 0.0, "rewards/chosen": 0.520753800868988, "rewards/margins": -0.12597960233688354, "rewards/rejected": 0.6467334032058716, "step": 303 }, { "epoch": 0.55, "learning_rate": 9.39761328366734e-08, "logits/chosen": -2.3532521724700928, "logits/rejected": -2.4145476818084717, "logps/chosen": -2.060835123062134, "logps/rejected": -24.44768714904785, "loss": 0.4359, "rewards/accuracies": 1.0, "rewards/chosen": 0.5547723770141602, "rewards/margins": 0.5358695983886719, "rewards/rejected": 0.01890277862548828, "step": 304 }, { "epoch": 0.55, "learning_rate": 9.392959320214009e-08, "logits/chosen": -2.3111395835876465, "logits/rejected": -2.3061604499816895, "logps/chosen": -1.405099630355835, "logps/rejected": -5.1378865242004395, "loss": 0.6969, "rewards/accuracies": 0.0, "rewards/chosen": 0.5900663733482361, "rewards/margins": -0.02918875217437744, "rewards/rejected": 0.6192551255226135, "step": 305 }, { "epoch": 0.55, "learning_rate": 9.388288609464503e-08, "logits/chosen": -2.326878786087036, "logits/rejected": -2.328749656677246, "logps/chosen": -3.320894241333008, "logps/rejected": -2.840799570083618, "loss": 0.6334, "rewards/accuracies": 0.0, "rewards/chosen": 0.5752615332603455, "rewards/margins": -0.11534786224365234, "rewards/rejected": 0.6906093955039978, "step": 306 }, { "epoch": 0.56, "learning_rate": 9.383601169224993e-08, "logits/chosen": -2.357947826385498, "logits/rejected": -2.3799593448638916, "logps/chosen": -9.844342231750488, "logps/rejected": -8.272444725036621, "loss": 0.8637, "rewards/accuracies": 0.0, "rewards/chosen": 0.5189711451530457, "rewards/margins": -0.12937575578689575, "rewards/rejected": 0.6483469009399414, "step": 307 }, { "epoch": 0.56, "learning_rate": 9.378897017365424e-08, "logits/chosen": -2.227837085723877, "logits/rejected": -2.2236201763153076, "logps/chosen": -3.187262535095215, "logps/rejected": -4.425328254699707, "loss": 0.6266, "rewards/accuracies": 1.0, "rewards/chosen": 0.9137681126594543, "rewards/margins": 0.17564678192138672, "rewards/rejected": 0.7381213307380676, "step": 308 }, { "epoch": 0.56, "learning_rate": 9.374176171819455e-08, "logits/chosen": -2.348379373550415, "logits/rejected": -2.3223185539245605, "logps/chosen": -2.2164623737335205, "logps/rejected": 0.0, "loss": 0.6708, "rewards/accuracies": 1.0, "rewards/chosen": 0.804578423500061, "rewards/margins": 0.804578423500061, "rewards/rejected": 0.0, "step": 309 }, { "epoch": 0.56, "learning_rate": 9.369438650584382e-08, "logits/chosen": -2.2423951625823975, "logits/rejected": -2.2142255306243896, "logps/chosen": -2.1118860244750977, "logps/rejected": 0.0, "loss": 0.6776, "rewards/accuracies": 1.0, "rewards/chosen": 0.5469163656234741, "rewards/margins": 0.5469163656234741, "rewards/rejected": 0.0, "step": 310 }, { "epoch": 0.56, "learning_rate": 9.364684471721076e-08, "logits/chosen": -2.175448179244995, "logits/rejected": -2.1790261268615723, "logps/chosen": -2.617164373397827, "logps/rejected": -8.18201732635498, "loss": 0.8642, "rewards/accuracies": 0.0, "rewards/chosen": 0.7616419792175293, "rewards/margins": -0.20337814092636108, "rewards/rejected": 0.9650201201438904, "step": 311 }, { "epoch": 0.56, "learning_rate": 9.359913653353913e-08, "logits/chosen": -2.281376838684082, "logits/rejected": -2.291513204574585, "logps/chosen": -2.4218735694885254, "logps/rejected": -1.853491187095642, "loss": 0.7052, "rewards/accuracies": 1.0, "rewards/chosen": 0.6107679009437561, "rewards/margins": 0.15070411562919617, "rewards/rejected": 0.46006378531455994, "step": 312 }, { "epoch": 0.57, "learning_rate": 9.355126213670703e-08, "logits/chosen": -2.3488361835479736, "logits/rejected": -2.351581573486328, "logps/chosen": -4.821601390838623, "logps/rejected": -2.8344340324401855, "loss": 0.7601, "rewards/accuracies": 1.0, "rewards/chosen": 0.562360942363739, "rewards/margins": 0.031202614307403564, "rewards/rejected": 0.5311583280563354, "step": 313 }, { "epoch": 0.57, "learning_rate": 9.350322170922619e-08, "logits/chosen": -2.284182071685791, "logits/rejected": -2.287053108215332, "logps/chosen": -3.0544302463531494, "logps/rejected": -7.334153652191162, "loss": 0.5283, "rewards/accuracies": 1.0, "rewards/chosen": 0.5977603197097778, "rewards/margins": 0.09918773174285889, "rewards/rejected": 0.49857258796691895, "step": 314 }, { "epoch": 0.57, "learning_rate": 9.345501543424134e-08, "logits/chosen": -2.229400396347046, "logits/rejected": -2.2214527130126953, "logps/chosen": -4.065117359161377, "logps/rejected": -11.43116569519043, "loss": 0.8705, "rewards/accuracies": 1.0, "rewards/chosen": 0.7461277842521667, "rewards/margins": 0.24062508344650269, "rewards/rejected": 0.5055027008056641, "step": 315 }, { "epoch": 0.57, "learning_rate": 9.340664349552944e-08, "logits/chosen": -2.294739007949829, "logits/rejected": -2.459890365600586, "logps/chosen": -3.3137221336364746, "logps/rejected": -51.82297897338867, "loss": 0.4075, "rewards/accuracies": 1.0, "rewards/chosen": 0.6262884736061096, "rewards/margins": 0.43697839975357056, "rewards/rejected": 0.18931007385253906, "step": 316 }, { "epoch": 0.57, "learning_rate": 9.335810607749905e-08, "logits/chosen": -2.2724227905273438, "logits/rejected": -2.2826638221740723, "logps/chosen": -1.949375867843628, "logps/rejected": -2.440560817718506, "loss": 0.6591, "rewards/accuracies": 1.0, "rewards/chosen": 0.6305365562438965, "rewards/margins": 0.08958965539932251, "rewards/rejected": 0.540946900844574, "step": 317 }, { "epoch": 0.58, "learning_rate": 9.330940336518956e-08, "logits/chosen": -2.3131814002990723, "logits/rejected": -2.3115627765655518, "logps/chosen": -1.1398251056671143, "logps/rejected": -4.0198073387146, "loss": 0.5383, "rewards/accuracies": 1.0, "rewards/chosen": 0.3618224859237671, "rewards/margins": 0.017364472150802612, "rewards/rejected": 0.3444580137729645, "step": 318 }, { "epoch": 0.58, "learning_rate": 9.326053554427045e-08, "logits/chosen": -2.29965877532959, "logits/rejected": -2.302220344543457, "logps/chosen": -7.299316883087158, "logps/rejected": -7.495055198669434, "loss": 0.684, "rewards/accuracies": 1.0, "rewards/chosen": 0.9073526263237, "rewards/margins": 0.03709590435028076, "rewards/rejected": 0.8702567219734192, "step": 319 }, { "epoch": 0.58, "learning_rate": 9.321150280104077e-08, "logits/chosen": -2.2905566692352295, "logits/rejected": -2.291099786758423, "logps/chosen": -2.58135724067688, "logps/rejected": -3.7716403007507324, "loss": 0.6049, "rewards/accuracies": 1.0, "rewards/chosen": 0.6356997489929199, "rewards/margins": 0.08255201578140259, "rewards/rejected": 0.5531477332115173, "step": 320 }, { "epoch": 0.58, "learning_rate": 9.316230532242823e-08, "logits/chosen": -2.2170755863189697, "logits/rejected": -2.2238659858703613, "logps/chosen": -2.6900277137756348, "logps/rejected": -1.5625793933868408, "loss": 0.5137, "rewards/accuracies": 1.0, "rewards/chosen": 0.7816378474235535, "rewards/margins": 0.02629566192626953, "rewards/rejected": 0.7553421854972839, "step": 321 }, { "epoch": 0.58, "learning_rate": 9.311294329598858e-08, "logits/chosen": -2.13840913772583, "logits/rejected": -2.1446352005004883, "logps/chosen": -2.6215949058532715, "logps/rejected": -2.462059497833252, "loss": 0.7165, "rewards/accuracies": 0.0, "rewards/chosen": 0.42187371850013733, "rewards/margins": -0.08237114548683167, "rewards/rejected": 0.504244863986969, "step": 322 }, { "epoch": 0.58, "learning_rate": 9.306341690990482e-08, "logits/chosen": -2.2081103324890137, "logits/rejected": -2.2462992668151855, "logps/chosen": -2.356576442718506, "logps/rejected": -26.83817481994629, "loss": 0.5303, "rewards/accuracies": 1.0, "rewards/chosen": 0.531154215335846, "rewards/margins": 0.4287719428539276, "rewards/rejected": 0.10238227993249893, "step": 323 }, { "epoch": 0.59, "learning_rate": 9.301372635298663e-08, "logits/chosen": -2.2196407318115234, "logits/rejected": -2.2382774353027344, "logps/chosen": -9.149744987487793, "logps/rejected": -11.218825340270996, "loss": 0.7684, "rewards/accuracies": 0.0, "rewards/chosen": 0.2558375298976898, "rewards/margins": -0.22818318009376526, "rewards/rejected": 0.4840207099914551, "step": 324 }, { "epoch": 0.59, "learning_rate": 9.296387181466952e-08, "logits/chosen": -2.28796648979187, "logits/rejected": -2.2989211082458496, "logps/chosen": -3.5947844982147217, "logps/rejected": -2.4993343353271484, "loss": 0.5331, "rewards/accuracies": 1.0, "rewards/chosen": 0.6818515658378601, "rewards/margins": 0.04717731475830078, "rewards/rejected": 0.6346742510795593, "step": 325 }, { "epoch": 0.59, "learning_rate": 9.291385348501413e-08, "logits/chosen": -2.3061280250549316, "logits/rejected": -2.3246424198150635, "logps/chosen": -9.85309886932373, "logps/rejected": -25.725929260253906, "loss": 0.594, "rewards/accuracies": 1.0, "rewards/chosen": 0.63428795337677, "rewards/margins": 0.5015113353729248, "rewards/rejected": 0.1327766478061676, "step": 326 }, { "epoch": 0.59, "learning_rate": 9.286367155470552e-08, "logits/chosen": -2.2346713542938232, "logits/rejected": -2.239778995513916, "logps/chosen": -2.4756953716278076, "logps/rejected": -10.40604019165039, "loss": 0.5759, "rewards/accuracies": 0.0, "rewards/chosen": 0.5411669015884399, "rewards/margins": -0.2202463150024414, "rewards/rejected": 0.7614132165908813, "step": 327 }, { "epoch": 0.59, "learning_rate": 9.281332621505248e-08, "logits/chosen": -2.312060594558716, "logits/rejected": -2.3225698471069336, "logps/chosen": -6.160004615783691, "logps/rejected": -4.111545562744141, "loss": 0.5421, "rewards/accuracies": 0.0, "rewards/chosen": 0.5768723487854004, "rewards/margins": -0.10455316305160522, "rewards/rejected": 0.6814255118370056, "step": 328 }, { "epoch": 0.59, "learning_rate": 9.276281765798675e-08, "logits/chosen": -2.3644444942474365, "logits/rejected": -2.3599913120269775, "logps/chosen": -5.3442792892456055, "logps/rejected": -18.64691162109375, "loss": 0.5311, "rewards/accuracies": 1.0, "rewards/chosen": 0.4386536777019501, "rewards/margins": 0.41516736149787903, "rewards/rejected": 0.02348632924258709, "step": 329 }, { "epoch": 0.6, "learning_rate": 9.27121460760623e-08, "logits/chosen": -2.283151865005493, "logits/rejected": -2.275709629058838, "logps/chosen": -2.0110316276550293, "logps/rejected": -6.1589531898498535, "loss": 0.6576, "rewards/accuracies": 1.0, "rewards/chosen": 0.59953373670578, "rewards/margins": 0.21125978231430054, "rewards/rejected": 0.3882739543914795, "step": 330 }, { "epoch": 0.6, "learning_rate": 9.266131166245458e-08, "logits/chosen": -2.3799333572387695, "logits/rejected": -2.3699746131896973, "logps/chosen": -2.4182491302490234, "logps/rejected": -7.407041549682617, "loss": 0.615, "rewards/accuracies": 1.0, "rewards/chosen": 0.558106541633606, "rewards/margins": 0.2163083255290985, "rewards/rejected": 0.34179821610450745, "step": 331 }, { "epoch": 0.6, "learning_rate": 9.261031461095987e-08, "logits/chosen": -2.3841664791107178, "logits/rejected": -2.3809752464294434, "logps/chosen": -2.176074981689453, "logps/rejected": -1.9813101291656494, "loss": 0.6708, "rewards/accuracies": 1.0, "rewards/chosen": 0.60582035779953, "rewards/margins": 0.04416394233703613, "rewards/rejected": 0.5616564154624939, "step": 332 }, { "epoch": 0.6, "learning_rate": 9.25591551159944e-08, "logits/chosen": -2.389469623565674, "logits/rejected": -2.4152793884277344, "logps/chosen": 0.0, "logps/rejected": -3.0348894596099854, "loss": 0.8333, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": -0.8308393359184265, "rewards/rejected": 0.8308393359184265, "step": 333 }, { "epoch": 0.6, "learning_rate": 9.25078333725937e-08, "logits/chosen": -2.288475751876831, "logits/rejected": -2.2854645252227783, "logps/chosen": -6.119935989379883, "logps/rejected": -2.7215659618377686, "loss": 0.6676, "rewards/accuracies": 1.0, "rewards/chosen": 0.7988829016685486, "rewards/margins": 0.2539122700691223, "rewards/rejected": 0.5449706315994263, "step": 334 }, { "epoch": 0.61, "learning_rate": 9.24563495764119e-08, "logits/chosen": -2.4089407920837402, "logits/rejected": -2.3716378211975098, "logps/chosen": -23.23965835571289, "logps/rejected": -2.508514881134033, "loss": 0.6128, "rewards/accuracies": 0.0, "rewards/chosen": 0.32536888122558594, "rewards/margins": -0.2841595411300659, "rewards/rejected": 0.6095284223556519, "step": 335 }, { "epoch": 0.61, "learning_rate": 9.240470392372084e-08, "logits/chosen": -2.166728973388672, "logits/rejected": -2.167111396789551, "logps/chosen": -4.137216091156006, "logps/rejected": -1.7957171201705933, "loss": 0.6232, "rewards/accuracies": 1.0, "rewards/chosen": 0.6047542095184326, "rewards/margins": 0.057252705097198486, "rewards/rejected": 0.5475015044212341, "step": 336 }, { "epoch": 0.61, "learning_rate": 9.235289661140946e-08, "logits/chosen": -2.1724934577941895, "logits/rejected": -2.237316131591797, "logps/chosen": -2.985236644744873, "logps/rejected": -25.275291442871094, "loss": 0.6539, "rewards/accuracies": 1.0, "rewards/chosen": 0.48015400767326355, "rewards/margins": 0.15833470225334167, "rewards/rejected": 0.3218193054199219, "step": 337 }, { "epoch": 0.61, "learning_rate": 9.230092783698298e-08, "logits/chosen": -2.263807535171509, "logits/rejected": -2.264707565307617, "logps/chosen": -2.587961196899414, "logps/rejected": -2.5045833587646484, "loss": 0.7024, "rewards/accuracies": 1.0, "rewards/chosen": 0.7422122955322266, "rewards/margins": 0.2259722352027893, "rewards/rejected": 0.5162400603294373, "step": 338 }, { "epoch": 0.61, "learning_rate": 9.224879779856217e-08, "logits/chosen": -2.3309454917907715, "logits/rejected": -2.3487050533294678, "logps/chosen": -5.190051555633545, "logps/rejected": -9.475951194763184, "loss": 0.6468, "rewards/accuracies": 1.0, "rewards/chosen": 0.5696481466293335, "rewards/margins": 0.19580581784248352, "rewards/rejected": 0.37384232878685, "step": 339 }, { "epoch": 0.61, "learning_rate": 9.219650669488259e-08, "logits/chosen": -2.3227646350860596, "logits/rejected": -2.324828863143921, "logps/chosen": -9.154789924621582, "logps/rejected": -5.609498977661133, "loss": 0.553, "rewards/accuracies": 1.0, "rewards/chosen": 0.6903061866760254, "rewards/margins": 0.37442997097969055, "rewards/rejected": 0.31587621569633484, "step": 340 }, { "epoch": 0.62, "learning_rate": 9.214405472529379e-08, "logits/chosen": -2.2714195251464844, "logits/rejected": -2.2767417430877686, "logps/chosen": -3.2854483127593994, "logps/rejected": -4.795475006103516, "loss": 0.6616, "rewards/accuracies": 1.0, "rewards/chosen": 0.728191077709198, "rewards/margins": 0.22440457344055176, "rewards/rejected": 0.5037865042686462, "step": 341 }, { "epoch": 0.62, "learning_rate": 9.209144208975864e-08, "logits/chosen": -2.3593051433563232, "logits/rejected": -2.33642315864563, "logps/chosen": -3.0334110260009766, "logps/rejected": 0.0, "loss": 0.7116, "rewards/accuracies": 1.0, "rewards/chosen": 0.8433395624160767, "rewards/margins": 0.8433395624160767, "rewards/rejected": 0.0, "step": 342 }, { "epoch": 0.62, "learning_rate": 9.20386689888525e-08, "logits/chosen": -2.3410110473632812, "logits/rejected": -2.343627691268921, "logps/chosen": -3.0166678428649902, "logps/rejected": -2.587754011154175, "loss": 0.7571, "rewards/accuracies": 0.0, "rewards/chosen": 0.6913970708847046, "rewards/margins": -0.0823250412940979, "rewards/rejected": 0.7737221121788025, "step": 343 }, { "epoch": 0.62, "learning_rate": 9.198573562376247e-08, "logits/chosen": -2.3167643547058105, "logits/rejected": -2.320056676864624, "logps/chosen": -4.613986492156982, "logps/rejected": -2.7946436405181885, "loss": 0.6674, "rewards/accuracies": 0.0, "rewards/chosen": 0.4921901226043701, "rewards/margins": -0.12472188472747803, "rewards/rejected": 0.6169120073318481, "step": 344 }, { "epoch": 0.62, "learning_rate": 9.193264219628663e-08, "logits/chosen": -2.4092307090759277, "logits/rejected": -2.5508439540863037, "logps/chosen": -4.48306131362915, "logps/rejected": -35.90182876586914, "loss": 0.4123, "rewards/accuracies": 1.0, "rewards/chosen": 0.5113856792449951, "rewards/margins": 0.6893302202224731, "rewards/rejected": -0.17794457077980042, "step": 345 }, { "epoch": 0.63, "learning_rate": 9.187938890883327e-08, "logits/chosen": -2.331329822540283, "logits/rejected": -2.3275985717773438, "logps/chosen": -5.847905158996582, "logps/rejected": -2.0188400745391846, "loss": 0.4805, "rewards/accuracies": 1.0, "rewards/chosen": 0.7449723482131958, "rewards/margins": 0.26426127552986145, "rewards/rejected": 0.48071107268333435, "step": 346 }, { "epoch": 0.63, "learning_rate": 9.182597596442007e-08, "logits/chosen": -2.3448681831359863, "logits/rejected": -2.348618984222412, "logps/chosen": -2.0131418704986572, "logps/rejected": -1.354601263999939, "loss": 0.6438, "rewards/accuracies": 1.0, "rewards/chosen": 0.6139282584190369, "rewards/margins": 0.12686336040496826, "rewards/rejected": 0.4870648980140686, "step": 347 }, { "epoch": 0.63, "learning_rate": 9.177240356667346e-08, "logits/chosen": -2.400465250015259, "logits/rejected": -2.4761016368865967, "logps/chosen": -2.8290646076202393, "logps/rejected": -31.878591537475586, "loss": 0.3927, "rewards/accuracies": 1.0, "rewards/chosen": 0.7233295440673828, "rewards/margins": 0.48520106077194214, "rewards/rejected": 0.23812846839427948, "step": 348 }, { "epoch": 0.63, "learning_rate": 9.171867191982767e-08, "logits/chosen": -2.3497374057769775, "logits/rejected": -2.3227341175079346, "logps/chosen": -1.3133076429367065, "logps/rejected": 0.0, "loss": 0.5496, "rewards/accuracies": 1.0, "rewards/chosen": 0.66140216588974, "rewards/margins": 0.66140216588974, "rewards/rejected": 0.0, "step": 349 }, { "epoch": 0.63, "learning_rate": 9.166478122872408e-08, "logits/chosen": -2.352022171020508, "logits/rejected": -2.3225815296173096, "logps/chosen": -2.6945879459381104, "logps/rejected": 0.0, "loss": 0.5451, "rewards/accuracies": 1.0, "rewards/chosen": 0.7188169360160828, "rewards/margins": 0.7188169360160828, "rewards/rejected": 0.0, "step": 350 }, { "epoch": 0.63, "learning_rate": 9.161073169881038e-08, "logits/chosen": -2.5224151611328125, "logits/rejected": -2.5200514793395996, "logps/chosen": -9.643415451049805, "logps/rejected": -11.227517127990723, "loss": 0.7078, "rewards/accuracies": 1.0, "rewards/chosen": -0.008851051330566406, "rewards/margins": 0.030498504638671875, "rewards/rejected": -0.03934955596923828, "step": 351 }, { "epoch": 0.64, "learning_rate": 9.155652353613981e-08, "logits/chosen": -2.317885398864746, "logits/rejected": -2.318803071975708, "logps/chosen": -17.599252700805664, "logps/rejected": -6.685110092163086, "loss": 0.6702, "rewards/accuracies": 0.0, "rewards/chosen": 0.529034435749054, "rewards/margins": -0.17869961261749268, "rewards/rejected": 0.7077340483665466, "step": 352 }, { "epoch": 0.64, "learning_rate": 9.150215694737038e-08, "logits/chosen": -2.378970146179199, "logits/rejected": -2.3719260692596436, "logps/chosen": -4.853835582733154, "logps/rejected": -1.8879729509353638, "loss": 0.6837, "rewards/accuracies": 1.0, "rewards/chosen": 0.6504444479942322, "rewards/margins": 0.1204269528388977, "rewards/rejected": 0.5300174951553345, "step": 353 }, { "epoch": 0.64, "learning_rate": 9.144763213976401e-08, "logits/chosen": -2.2143661975860596, "logits/rejected": -2.2407195568084717, "logps/chosen": -2.583603858947754, "logps/rejected": -7.718847274780273, "loss": 0.599, "rewards/accuracies": 1.0, "rewards/chosen": 0.6568811535835266, "rewards/margins": 0.15538328886032104, "rewards/rejected": 0.5014978647232056, "step": 354 }, { "epoch": 0.64, "learning_rate": 9.139294932118588e-08, "logits/chosen": -2.37888765335083, "logits/rejected": -2.3804264068603516, "logps/chosen": -3.1523070335388184, "logps/rejected": -7.3775482177734375, "loss": 0.7259, "rewards/accuracies": 0.0, "rewards/chosen": 0.6722378134727478, "rewards/margins": -0.08793944120407104, "rewards/rejected": 0.7601772546768188, "step": 355 }, { "epoch": 0.64, "learning_rate": 9.13381087001035e-08, "logits/chosen": -2.3152620792388916, "logits/rejected": -2.314208745956421, "logps/chosen": -15.252741813659668, "logps/rejected": -3.6122875213623047, "loss": 0.6001, "rewards/accuracies": 1.0, "rewards/chosen": 0.7588933110237122, "rewards/margins": 0.14813071489334106, "rewards/rejected": 0.6107625961303711, "step": 356 }, { "epoch": 0.65, "learning_rate": 9.128311048558598e-08, "logits/chosen": -2.2979366779327393, "logits/rejected": -2.4134745597839355, "logps/chosen": -2.870546579360962, "logps/rejected": -51.962310791015625, "loss": 0.5652, "rewards/accuracies": 1.0, "rewards/chosen": 0.5271693468093872, "rewards/margins": 0.39675360918045044, "rewards/rejected": 0.13041572272777557, "step": 357 }, { "epoch": 0.65, "learning_rate": 9.122795488730324e-08, "logits/chosen": -2.3352251052856445, "logits/rejected": -2.3431777954101562, "logps/chosen": -3.0318992137908936, "logps/rejected": -1.9281437397003174, "loss": 0.4986, "rewards/accuracies": 1.0, "rewards/chosen": 0.6822253465652466, "rewards/margins": 0.1773180365562439, "rewards/rejected": 0.5049073100090027, "step": 358 }, { "epoch": 0.65, "learning_rate": 9.117264211552517e-08, "logits/chosen": -2.3325448036193848, "logits/rejected": -2.3399832248687744, "logps/chosen": -6.807514667510986, "logps/rejected": -12.048301696777344, "loss": 0.8229, "rewards/accuracies": 1.0, "rewards/chosen": 0.6123217940330505, "rewards/margins": 0.3123232126235962, "rewards/rejected": 0.29999858140945435, "step": 359 }, { "epoch": 0.65, "learning_rate": 9.111717238112088e-08, "logits/chosen": -2.303180694580078, "logits/rejected": -2.298560857772827, "logps/chosen": -2.2715554237365723, "logps/rejected": -2.7441844940185547, "loss": 0.6596, "rewards/accuracies": 1.0, "rewards/chosen": 0.8399181365966797, "rewards/margins": 0.26343458890914917, "rewards/rejected": 0.5764835476875305, "step": 360 }, { "epoch": 0.65, "learning_rate": 9.106154589555788e-08, "logits/chosen": -2.3694169521331787, "logits/rejected": -2.3705451488494873, "logps/chosen": -1.4321409463882446, "logps/rejected": -4.145229339599609, "loss": 0.6348, "rewards/accuracies": 1.0, "rewards/chosen": 0.6462551951408386, "rewards/margins": 0.10329747200012207, "rewards/rejected": 0.5429577231407166, "step": 361 }, { "epoch": 0.65, "learning_rate": 9.100576287090121e-08, "logits/chosen": -2.3477821350097656, "logits/rejected": -2.3675239086151123, "logps/chosen": -2.536196231842041, "logps/rejected": -24.351703643798828, "loss": 0.5795, "rewards/accuracies": 1.0, "rewards/chosen": 0.572725236415863, "rewards/margins": 0.2511613070964813, "rewards/rejected": 0.3215639293193817, "step": 362 }, { "epoch": 0.66, "learning_rate": 9.094982351981271e-08, "logits/chosen": -2.4216017723083496, "logits/rejected": -2.4168710708618164, "logps/chosen": -1.7033889293670654, "logps/rejected": -19.698780059814453, "loss": 0.5734, "rewards/accuracies": 1.0, "rewards/chosen": 0.6390191912651062, "rewards/margins": 0.5099782943725586, "rewards/rejected": 0.1290409117937088, "step": 363 }, { "epoch": 0.66, "learning_rate": 9.089372805555022e-08, "logits/chosen": -2.3910303115844727, "logits/rejected": -2.380282402038574, "logps/chosen": -3.3801846504211426, "logps/rejected": -2.7808279991149902, "loss": 0.7028, "rewards/accuracies": 0.0, "rewards/chosen": 0.815136730670929, "rewards/margins": -0.11551648378372192, "rewards/rejected": 0.9306532144546509, "step": 364 }, { "epoch": 0.66, "learning_rate": 9.083747669196667e-08, "logits/chosen": -2.2979471683502197, "logits/rejected": -2.3804283142089844, "logps/chosen": -2.4946279525756836, "logps/rejected": -28.075254440307617, "loss": 0.5721, "rewards/accuracies": 1.0, "rewards/chosen": 0.6411418318748474, "rewards/margins": 0.3193768858909607, "rewards/rejected": 0.3217649459838867, "step": 365 }, { "epoch": 0.66, "learning_rate": 9.078106964350938e-08, "logits/chosen": -2.333056688308716, "logits/rejected": -2.3416900634765625, "logps/chosen": -2.1625754833221436, "logps/rejected": -8.759231567382812, "loss": 0.6876, "rewards/accuracies": 1.0, "rewards/chosen": 0.5347411036491394, "rewards/margins": 0.10419425368309021, "rewards/rejected": 0.4305468499660492, "step": 366 }, { "epoch": 0.66, "learning_rate": 9.072450712521913e-08, "logits/chosen": -2.263340711593628, "logits/rejected": -2.247802257537842, "logps/chosen": -5.029279708862305, "logps/rejected": 0.0, "loss": 0.6021, "rewards/accuracies": 1.0, "rewards/chosen": 0.5990249514579773, "rewards/margins": 0.5990249514579773, "rewards/rejected": 0.0, "step": 367 }, { "epoch": 0.67, "learning_rate": 9.066778935272947e-08, "logits/chosen": -2.349672555923462, "logits/rejected": -2.3276443481445312, "logps/chosen": -0.8738144636154175, "logps/rejected": 0.0, "loss": 0.5551, "rewards/accuracies": 1.0, "rewards/chosen": 0.5241590142250061, "rewards/margins": 0.5241590142250061, "rewards/rejected": 0.0, "step": 368 }, { "epoch": 0.67, "learning_rate": 9.061091654226578e-08, "logits/chosen": -2.2205777168273926, "logits/rejected": -2.2336316108703613, "logps/chosen": -4.377147674560547, "logps/rejected": -16.16172218322754, "loss": 0.494, "rewards/accuracies": 1.0, "rewards/chosen": 0.5743271112442017, "rewards/margins": 0.3231258690357208, "rewards/rejected": 0.25120124220848083, "step": 369 }, { "epoch": 0.67, "learning_rate": 9.055388891064448e-08, "logits/chosen": -2.276132583618164, "logits/rejected": -2.27970552444458, "logps/chosen": -7.286733627319336, "logps/rejected": -1.1257778406143188, "loss": 0.7347, "rewards/accuracies": 0.0, "rewards/chosen": 0.27261456847190857, "rewards/margins": -0.1787462830543518, "rewards/rejected": 0.4513608515262604, "step": 370 }, { "epoch": 0.67, "learning_rate": 9.049670667527225e-08, "logits/chosen": -2.2862870693206787, "logits/rejected": -2.2795140743255615, "logps/chosen": -2.5285274982452393, "logps/rejected": -4.126417636871338, "loss": 0.5857, "rewards/accuracies": 0.0, "rewards/chosen": 0.6757171750068665, "rewards/margins": -0.053269922733306885, "rewards/rejected": 0.7289870977401733, "step": 371 }, { "epoch": 0.67, "learning_rate": 9.043937005414515e-08, "logits/chosen": -2.323073148727417, "logits/rejected": -2.4713454246520996, "logps/chosen": -2.9711520671844482, "logps/rejected": -29.082494735717773, "loss": 0.6023, "rewards/accuracies": 1.0, "rewards/chosen": 0.5197710990905762, "rewards/margins": 0.6110726594924927, "rewards/rejected": -0.09130153805017471, "step": 372 }, { "epoch": 0.67, "learning_rate": 9.038187926584781e-08, "logits/chosen": -2.3975672721862793, "logits/rejected": -2.391876220703125, "logps/chosen": -2.6668219566345215, "logps/rejected": -2.8492541313171387, "loss": 0.624, "rewards/accuracies": 1.0, "rewards/chosen": 0.7119171023368835, "rewards/margins": 0.06814998388290405, "rewards/rejected": 0.6437671184539795, "step": 373 }, { "epoch": 0.68, "learning_rate": 9.032423452955258e-08, "logits/chosen": -2.3386969566345215, "logits/rejected": -2.344388008117676, "logps/chosen": -2.4478845596313477, "logps/rejected": -2.983734130859375, "loss": 0.5836, "rewards/accuracies": 1.0, "rewards/chosen": 0.8920173048973083, "rewards/margins": 0.24751514196395874, "rewards/rejected": 0.6445021629333496, "step": 374 }, { "epoch": 0.68, "learning_rate": 9.026643606501874e-08, "logits/chosen": -2.3494889736175537, "logits/rejected": -2.3496108055114746, "logps/chosen": -6.987615585327148, "logps/rejected": -6.77156400680542, "loss": 0.7292, "rewards/accuracies": 1.0, "rewards/chosen": 0.4825381338596344, "rewards/margins": 0.13045135140419006, "rewards/rejected": 0.35208678245544434, "step": 375 }, { "epoch": 0.68, "learning_rate": 9.020848409259156e-08, "logits/chosen": -2.2447688579559326, "logits/rejected": -2.2521135807037354, "logps/chosen": -6.178703784942627, "logps/rejected": -2.672461748123169, "loss": 0.7472, "rewards/accuracies": 0.0, "rewards/chosen": 0.4955800473690033, "rewards/margins": -0.015433460474014282, "rewards/rejected": 0.5110135078430176, "step": 376 }, { "epoch": 0.68, "learning_rate": 9.015037883320162e-08, "logits/chosen": -2.301642656326294, "logits/rejected": -2.3420591354370117, "logps/chosen": -3.6878366470336914, "logps/rejected": -33.433128356933594, "loss": 0.6129, "rewards/accuracies": 1.0, "rewards/chosen": 0.7086203694343567, "rewards/margins": 0.5097468495368958, "rewards/rejected": 0.19887351989746094, "step": 377 }, { "epoch": 0.68, "learning_rate": 9.00921205083638e-08, "logits/chosen": -2.280240774154663, "logits/rejected": -2.282367706298828, "logps/chosen": -2.347146987915039, "logps/rejected": -4.15733528137207, "loss": 0.5733, "rewards/accuracies": 1.0, "rewards/chosen": 0.6183546185493469, "rewards/margins": 0.30211886763572693, "rewards/rejected": 0.31623575091362, "step": 378 }, { "epoch": 0.69, "learning_rate": 9.003370934017656e-08, "logits/chosen": -2.1745543479919434, "logits/rejected": -2.1789357662200928, "logps/chosen": -2.7869086265563965, "logps/rejected": -2.142763614654541, "loss": 0.503, "rewards/accuracies": 1.0, "rewards/chosen": 0.6916442513465881, "rewards/margins": 0.1206132173538208, "rewards/rejected": 0.5710310339927673, "step": 379 }, { "epoch": 0.69, "learning_rate": 8.997514555132102e-08, "logits/chosen": -2.3444535732269287, "logits/rejected": -2.3434011936187744, "logps/chosen": -5.28920316696167, "logps/rejected": -2.7882652282714844, "loss": 0.8166, "rewards/accuracies": 0.0, "rewards/chosen": 0.06353364139795303, "rewards/margins": -0.6131791472434998, "rewards/rejected": 0.6767128109931946, "step": 380 }, { "epoch": 0.69, "learning_rate": 8.991642936506013e-08, "logits/chosen": -2.2882161140441895, "logits/rejected": -2.2877321243286133, "logps/chosen": -3.6156764030456543, "logps/rejected": -2.3701820373535156, "loss": 0.7344, "rewards/accuracies": 1.0, "rewards/chosen": 0.6351770162582397, "rewards/margins": 0.05557382106781006, "rewards/rejected": 0.5796031951904297, "step": 381 }, { "epoch": 0.69, "learning_rate": 8.985756100523786e-08, "logits/chosen": -2.2713117599487305, "logits/rejected": -2.2610905170440674, "logps/chosen": -2.589977502822876, "logps/rejected": -7.396418571472168, "loss": 0.7509, "rewards/accuracies": 0.0, "rewards/chosen": 0.5417168736457825, "rewards/margins": -0.20155107975006104, "rewards/rejected": 0.7432679533958435, "step": 382 }, { "epoch": 0.69, "learning_rate": 8.979854069627828e-08, "logits/chosen": -2.3475217819213867, "logits/rejected": -2.3514325618743896, "logps/chosen": -2.7415480613708496, "logps/rejected": -3.238969087600708, "loss": 0.6179, "rewards/accuracies": 1.0, "rewards/chosen": 0.6954014301300049, "rewards/margins": 0.1835557222366333, "rewards/rejected": 0.5118457078933716, "step": 383 }, { "epoch": 0.69, "learning_rate": 8.973936866318477e-08, "logits/chosen": -2.2453973293304443, "logits/rejected": -2.2491214275360107, "logps/chosen": -3.0966248512268066, "logps/rejected": -4.689205646514893, "loss": 0.6215, "rewards/accuracies": 0.0, "rewards/chosen": 0.7692902088165283, "rewards/margins": -0.07488268613815308, "rewards/rejected": 0.8441728949546814, "step": 384 }, { "epoch": 0.7, "learning_rate": 8.968004513153907e-08, "logits/chosen": -2.246095657348633, "logits/rejected": -2.246965169906616, "logps/chosen": -2.95371150970459, "logps/rejected": -11.410226821899414, "loss": 0.7171, "rewards/accuracies": 0.0, "rewards/chosen": 0.6807675361633301, "rewards/margins": -0.11908310651779175, "rewards/rejected": 0.7998506426811218, "step": 385 }, { "epoch": 0.7, "learning_rate": 8.962057032750052e-08, "logits/chosen": -2.4596502780914307, "logits/rejected": -2.4650142192840576, "logps/chosen": -2.4163661003112793, "logps/rejected": -1.2975037097930908, "loss": 0.6481, "rewards/accuracies": 0.0, "rewards/chosen": 0.6673097014427185, "rewards/margins": -0.010853826999664307, "rewards/rejected": 0.6781635284423828, "step": 386 }, { "epoch": 0.7, "learning_rate": 8.956094447780517e-08, "logits/chosen": -2.2519097328186035, "logits/rejected": -2.3351857662200928, "logps/chosen": -2.3429641723632812, "logps/rejected": -24.898391723632812, "loss": 0.6496, "rewards/accuracies": 1.0, "rewards/chosen": 0.5261126160621643, "rewards/margins": 0.1935480535030365, "rewards/rejected": 0.3325645625591278, "step": 387 }, { "epoch": 0.7, "learning_rate": 8.950116780976486e-08, "logits/chosen": -2.3897602558135986, "logits/rejected": -2.3903751373291016, "logps/chosen": -3.0919673442840576, "logps/rejected": -1.9718372821807861, "loss": 0.6949, "rewards/accuracies": 0.0, "rewards/chosen": 0.7590625882148743, "rewards/margins": -0.007134914398193359, "rewards/rejected": 0.7661975026130676, "step": 388 }, { "epoch": 0.7, "learning_rate": 8.944124055126646e-08, "logits/chosen": -2.4567575454711914, "logits/rejected": -2.4570515155792236, "logps/chosen": -4.699686527252197, "logps/rejected": -12.0401611328125, "loss": 0.6773, "rewards/accuracies": 0.0, "rewards/chosen": 0.7563974261283875, "rewards/margins": -0.05020546913146973, "rewards/rejected": 0.8066028952598572, "step": 389 }, { "epoch": 0.71, "learning_rate": 8.938116293077084e-08, "logits/chosen": -2.4365928173065186, "logits/rejected": -2.472017288208008, "logps/chosen": -2.422236204147339, "logps/rejected": -20.876270294189453, "loss": 0.9395, "rewards/accuracies": 1.0, "rewards/chosen": 0.6184229850769043, "rewards/margins": 0.041199564933776855, "rewards/rejected": 0.5772234201431274, "step": 390 }, { "epoch": 0.71, "learning_rate": 8.932093517731218e-08, "logits/chosen": -2.3721261024475098, "logits/rejected": -2.3698835372924805, "logps/chosen": -3.2558000087738037, "logps/rejected": -2.8906049728393555, "loss": 0.6087, "rewards/accuracies": 1.0, "rewards/chosen": 0.5852876901626587, "rewards/margins": 0.14239904284477234, "rewards/rejected": 0.44288864731788635, "step": 391 }, { "epoch": 0.71, "learning_rate": 8.926055752049697e-08, "logits/chosen": -2.2247064113616943, "logits/rejected": -2.2421834468841553, "logps/chosen": -2.8880763053894043, "logps/rejected": -12.340458869934082, "loss": 0.6972, "rewards/accuracies": 0.0, "rewards/chosen": 0.6217995285987854, "rewards/margins": -0.10635286569595337, "rewards/rejected": 0.7281523942947388, "step": 392 }, { "epoch": 0.71, "learning_rate": 8.920003019050321e-08, "logits/chosen": -2.2183103561401367, "logits/rejected": -2.216355562210083, "logps/chosen": -3.6793570518493652, "logps/rejected": -1.4264476299285889, "loss": 0.7297, "rewards/accuracies": 0.0, "rewards/chosen": 0.6256065964698792, "rewards/margins": -0.03447544574737549, "rewards/rejected": 0.6600820422172546, "step": 393 }, { "epoch": 0.71, "learning_rate": 8.913935341807946e-08, "logits/chosen": -2.3591268062591553, "logits/rejected": -2.3617196083068848, "logps/chosen": -2.7071995735168457, "logps/rejected": -4.499296188354492, "loss": 0.7284, "rewards/accuracies": 1.0, "rewards/chosen": 0.8126323819160461, "rewards/margins": 0.04147738218307495, "rewards/rejected": 0.7711549997329712, "step": 394 }, { "epoch": 0.71, "learning_rate": 8.907852743454403e-08, "logits/chosen": -2.282562494277954, "logits/rejected": -2.2814135551452637, "logps/chosen": -5.2419114112854, "logps/rejected": -10.767685890197754, "loss": 0.7884, "rewards/accuracies": 0.0, "rewards/chosen": 0.877192497253418, "rewards/margins": -0.11280298233032227, "rewards/rejected": 0.9899954795837402, "step": 395 }, { "epoch": 0.72, "learning_rate": 8.901755247178404e-08, "logits/chosen": -2.2952725887298584, "logits/rejected": -2.2794344425201416, "logps/chosen": -5.596856117248535, "logps/rejected": -8.24475383758545, "loss": 0.4735, "rewards/accuracies": 1.0, "rewards/chosen": 0.8656021952629089, "rewards/margins": 0.32895809412002563, "rewards/rejected": 0.5366441011428833, "step": 396 }, { "epoch": 0.72, "learning_rate": 8.89564287622546e-08, "logits/chosen": -2.334233283996582, "logits/rejected": -2.334130048751831, "logps/chosen": -3.7605226039886475, "logps/rejected": -13.890082359313965, "loss": 0.7948, "rewards/accuracies": 0.0, "rewards/chosen": 0.7401514053344727, "rewards/margins": -0.036847591400146484, "rewards/rejected": 0.7769989967346191, "step": 397 }, { "epoch": 0.72, "learning_rate": 8.889515653897786e-08, "logits/chosen": -2.3830409049987793, "logits/rejected": -2.379868984222412, "logps/chosen": -5.502197265625, "logps/rejected": -2.8162362575531006, "loss": 0.635, "rewards/accuracies": 1.0, "rewards/chosen": 0.7246183753013611, "rewards/margins": 0.15579938888549805, "rewards/rejected": 0.568818986415863, "step": 398 }, { "epoch": 0.72, "learning_rate": 8.883373603554217e-08, "logits/chosen": -2.2512223720550537, "logits/rejected": -2.255786895751953, "logps/chosen": -1.4111555814743042, "logps/rejected": -5.707075595855713, "loss": 0.6001, "rewards/accuracies": 1.0, "rewards/chosen": 0.6104331016540527, "rewards/margins": 0.15251079201698303, "rewards/rejected": 0.4579223096370697, "step": 399 }, { "epoch": 0.72, "learning_rate": 8.877216748610116e-08, "logits/chosen": -2.311012029647827, "logits/rejected": -2.3148159980773926, "logps/chosen": -1.5377225875854492, "logps/rejected": -1.2005091905593872, "loss": 0.6223, "rewards/accuracies": 0.0, "rewards/chosen": 0.7251356244087219, "rewards/margins": -0.11450672149658203, "rewards/rejected": 0.839642345905304, "step": 400 }, { "epoch": 0.73, "learning_rate": 8.871045112537283e-08, "logits/chosen": -2.485295534133911, "logits/rejected": -2.485123872756958, "logps/chosen": -1.6114225387573242, "logps/rejected": -3.4570837020874023, "loss": 0.663, "rewards/accuracies": 1.0, "rewards/chosen": 0.8047816157341003, "rewards/margins": 0.09979301691055298, "rewards/rejected": 0.7049885988235474, "step": 401 }, { "epoch": 0.73, "learning_rate": 8.864858718863872e-08, "logits/chosen": -2.3321986198425293, "logits/rejected": -2.2938621044158936, "logps/chosen": -11.206086158752441, "logps/rejected": -1.4999384880065918, "loss": 0.6471, "rewards/accuracies": 1.0, "rewards/chosen": 0.6184651255607605, "rewards/margins": 0.05380403995513916, "rewards/rejected": 0.5646610856056213, "step": 402 }, { "epoch": 0.73, "learning_rate": 8.858657591174296e-08, "logits/chosen": -2.263334274291992, "logits/rejected": -2.263225555419922, "logps/chosen": -4.578157424926758, "logps/rejected": -3.7657172679901123, "loss": 0.6348, "rewards/accuracies": 1.0, "rewards/chosen": 0.6992048621177673, "rewards/margins": 0.24869531393051147, "rewards/rejected": 0.45050954818725586, "step": 403 }, { "epoch": 0.73, "learning_rate": 8.852441753109139e-08, "logits/chosen": -2.3172481060028076, "logits/rejected": -2.3211827278137207, "logps/chosen": -1.8430989980697632, "logps/rejected": -0.9596489667892456, "loss": 0.7921, "rewards/accuracies": 1.0, "rewards/chosen": 0.4777393341064453, "rewards/margins": 0.010152965784072876, "rewards/rejected": 0.46758636832237244, "step": 404 }, { "epoch": 0.73, "learning_rate": 8.846211228365066e-08, "logits/chosen": -2.340785026550293, "logits/rejected": -2.3411574363708496, "logps/chosen": -2.0157084465026855, "logps/rejected": -14.731325149536133, "loss": 0.6187, "rewards/accuracies": 1.0, "rewards/chosen": 0.890241265296936, "rewards/margins": 0.45984652638435364, "rewards/rejected": 0.4303947389125824, "step": 405 }, { "epoch": 0.73, "learning_rate": 8.839966040694729e-08, "logits/chosen": -2.1537976264953613, "logits/rejected": -2.1600048542022705, "logps/chosen": -1.2610485553741455, "logps/rejected": -2.766007423400879, "loss": 0.6896, "rewards/accuracies": 1.0, "rewards/chosen": 0.6260948181152344, "rewards/margins": 0.08419233560562134, "rewards/rejected": 0.541902482509613, "step": 406 }, { "epoch": 0.74, "learning_rate": 8.833706213906681e-08, "logits/chosen": -2.2818870544433594, "logits/rejected": -2.3082382678985596, "logps/chosen": -1.332961082458496, "logps/rejected": -27.644906997680664, "loss": 0.6446, "rewards/accuracies": 1.0, "rewards/chosen": 0.6012241244316101, "rewards/margins": 0.2047700583934784, "rewards/rejected": 0.3964540660381317, "step": 407 }, { "epoch": 0.74, "learning_rate": 8.827431771865287e-08, "logits/chosen": -2.404832124710083, "logits/rejected": -2.3975472450256348, "logps/chosen": -1.598494052886963, "logps/rejected": -7.239432334899902, "loss": 0.7013, "rewards/accuracies": 0.0, "rewards/chosen": 0.631912112236023, "rewards/margins": -0.0755276083946228, "rewards/rejected": 0.7074397206306458, "step": 408 }, { "epoch": 0.74, "learning_rate": 8.821142738490626e-08, "logits/chosen": -2.305513381958008, "logits/rejected": -2.275273084640503, "logps/chosen": -8.397685050964355, "logps/rejected": 0.0, "loss": 0.5848, "rewards/accuracies": 1.0, "rewards/chosen": 0.7450969815254211, "rewards/margins": 0.7450969815254211, "rewards/rejected": 0.0, "step": 409 }, { "epoch": 0.74, "learning_rate": 8.814839137758404e-08, "logits/chosen": -2.283658027648926, "logits/rejected": -2.2843148708343506, "logps/chosen": -1.9501357078552246, "logps/rejected": -10.247278213500977, "loss": 0.7287, "rewards/accuracies": 0.0, "rewards/chosen": 0.4959023594856262, "rewards/margins": -0.3000330924987793, "rewards/rejected": 0.7959354519844055, "step": 410 }, { "epoch": 0.74, "learning_rate": 8.808520993699863e-08, "logits/chosen": -2.4019973278045654, "logits/rejected": -2.4060170650482178, "logps/chosen": -1.7489173412322998, "logps/rejected": -2.0127577781677246, "loss": 0.8231, "rewards/accuracies": 1.0, "rewards/chosen": 0.5774537324905396, "rewards/margins": 0.09237867593765259, "rewards/rejected": 0.48507505655288696, "step": 411 }, { "epoch": 0.75, "learning_rate": 8.802188330401692e-08, "logits/chosen": -2.297262191772461, "logits/rejected": -2.362036943435669, "logps/chosen": -2.028564929962158, "logps/rejected": -23.77202606201172, "loss": 0.6234, "rewards/accuracies": 1.0, "rewards/chosen": 0.5304082632064819, "rewards/margins": 0.42460113763809204, "rewards/rejected": 0.1058071181178093, "step": 412 }, { "epoch": 0.75, "learning_rate": 8.795841172005925e-08, "logits/chosen": -2.2043232917785645, "logits/rejected": -2.2067697048187256, "logps/chosen": -2.5737714767456055, "logps/rejected": -2.243137836456299, "loss": 0.6247, "rewards/accuracies": 0.0, "rewards/chosen": 0.4670071601867676, "rewards/margins": -0.05091595649719238, "rewards/rejected": 0.51792311668396, "step": 413 }, { "epoch": 0.75, "learning_rate": 8.78947954270986e-08, "logits/chosen": -2.3179407119750977, "logits/rejected": -2.3258183002471924, "logps/chosen": -4.745360374450684, "logps/rejected": -2.2690274715423584, "loss": 0.7449, "rewards/accuracies": 1.0, "rewards/chosen": 0.647014319896698, "rewards/margins": 0.16898047924041748, "rewards/rejected": 0.4780338406562805, "step": 414 }, { "epoch": 0.75, "learning_rate": 8.783103466765959e-08, "logits/chosen": -2.2506229877471924, "logits/rejected": -2.2413597106933594, "logps/chosen": -2.855069160461426, "logps/rejected": -4.238304138183594, "loss": 0.6888, "rewards/accuracies": 1.0, "rewards/chosen": 0.7536749243736267, "rewards/margins": 0.09805941581726074, "rewards/rejected": 0.655615508556366, "step": 415 }, { "epoch": 0.75, "learning_rate": 8.776712968481765e-08, "logits/chosen": -2.3033576011657715, "logits/rejected": -2.297400712966919, "logps/chosen": -0.646006166934967, "logps/rejected": -2.1614952087402344, "loss": 0.7002, "rewards/accuracies": 0.0, "rewards/chosen": 0.668932318687439, "rewards/margins": -0.06676393747329712, "rewards/rejected": 0.7356962561607361, "step": 416 }, { "epoch": 0.75, "learning_rate": 8.770308072219799e-08, "logits/chosen": -2.3785974979400635, "logits/rejected": -2.3699052333831787, "logps/chosen": -8.516467094421387, "logps/rejected": -1.963841438293457, "loss": 0.6411, "rewards/accuracies": 1.0, "rewards/chosen": 0.8988510966300964, "rewards/margins": 0.2612333297729492, "rewards/rejected": 0.6376177668571472, "step": 417 }, { "epoch": 0.76, "learning_rate": 8.76388880239747e-08, "logits/chosen": -2.2859885692596436, "logits/rejected": -2.300584554672241, "logps/chosen": -4.227797031402588, "logps/rejected": -20.046592712402344, "loss": 0.4709, "rewards/accuracies": 1.0, "rewards/chosen": 0.677473247051239, "rewards/margins": 0.35347574949264526, "rewards/rejected": 0.32399749755859375, "step": 418 }, { "epoch": 0.76, "learning_rate": 8.757455183486989e-08, "logits/chosen": -2.2006964683532715, "logits/rejected": -2.199767827987671, "logps/chosen": -1.4981615543365479, "logps/rejected": -4.195127010345459, "loss": 0.7119, "rewards/accuracies": 1.0, "rewards/chosen": 0.5312055945396423, "rewards/margins": 0.11602848768234253, "rewards/rejected": 0.4151771068572998, "step": 419 }, { "epoch": 0.76, "learning_rate": 8.751007240015266e-08, "logits/chosen": -2.263664960861206, "logits/rejected": -2.260317325592041, "logps/chosen": -1.1456589698791504, "logps/rejected": -6.926059722900391, "loss": 0.6427, "rewards/accuracies": 1.0, "rewards/chosen": 0.6292659044265747, "rewards/margins": 0.2274167239665985, "rewards/rejected": 0.4018491804599762, "step": 420 }, { "epoch": 0.76, "learning_rate": 8.74454499656382e-08, "logits/chosen": -2.278529644012451, "logits/rejected": -2.273359775543213, "logps/chosen": -2.4825570583343506, "logps/rejected": -2.1996917724609375, "loss": 0.7866, "rewards/accuracies": 0.0, "rewards/chosen": 0.5015746355056763, "rewards/margins": -0.09304338693618774, "rewards/rejected": 0.594618022441864, "step": 421 }, { "epoch": 0.76, "learning_rate": 8.738068477768687e-08, "logits/chosen": -2.414182662963867, "logits/rejected": -2.388758659362793, "logps/chosen": -2.1262764930725098, "logps/rejected": 0.0, "loss": 0.6032, "rewards/accuracies": 1.0, "rewards/chosen": 0.5287694334983826, "rewards/margins": 0.5287694334983826, "rewards/rejected": 0.0, "step": 422 }, { "epoch": 0.76, "learning_rate": 8.731577708320328e-08, "logits/chosen": -2.3308181762695312, "logits/rejected": -2.3323709964752197, "logps/chosen": -4.515313625335693, "logps/rejected": -2.613213300704956, "loss": 0.5331, "rewards/accuracies": 1.0, "rewards/chosen": 0.9428254961967468, "rewards/margins": 0.20931005477905273, "rewards/rejected": 0.7335154414176941, "step": 423 }, { "epoch": 0.77, "learning_rate": 8.725072712963526e-08, "logits/chosen": -2.322589635848999, "logits/rejected": -2.2986481189727783, "logps/chosen": -2.614450216293335, "logps/rejected": 0.0, "loss": 0.4267, "rewards/accuracies": 1.0, "rewards/chosen": 0.5273237228393555, "rewards/margins": 0.5273237228393555, "rewards/rejected": 0.0, "step": 424 }, { "epoch": 0.77, "learning_rate": 8.718553516497303e-08, "logits/chosen": -2.4351184368133545, "logits/rejected": -2.430845022201538, "logps/chosen": -2.27872371673584, "logps/rejected": -6.187473297119141, "loss": 0.6513, "rewards/accuracies": 1.0, "rewards/chosen": 0.905195415019989, "rewards/margins": 0.1750289797782898, "rewards/rejected": 0.7301664352416992, "step": 425 }, { "epoch": 0.77, "learning_rate": 8.712020143774814e-08, "logits/chosen": -2.3435111045837402, "logits/rejected": -2.334700584411621, "logps/chosen": -4.233992576599121, "logps/rejected": -7.224752426147461, "loss": 0.7378, "rewards/accuracies": 0.0, "rewards/chosen": 0.6948464512825012, "rewards/margins": -0.27464091777801514, "rewards/rejected": 0.9694873690605164, "step": 426 }, { "epoch": 0.77, "learning_rate": 8.705472619703265e-08, "logits/chosen": -2.3329999446868896, "logits/rejected": -2.3405721187591553, "logps/chosen": -3.527653455734253, "logps/rejected": -3.11191463470459, "loss": 0.5811, "rewards/accuracies": 1.0, "rewards/chosen": 0.7556697130203247, "rewards/margins": 0.31217944622039795, "rewards/rejected": 0.44349026679992676, "step": 427 }, { "epoch": 0.77, "learning_rate": 8.698910969243806e-08, "logits/chosen": -2.2496328353881836, "logits/rejected": -2.2571451663970947, "logps/chosen": -1.2928112745285034, "logps/rejected": -4.048357009887695, "loss": 0.6063, "rewards/accuracies": 0.0, "rewards/chosen": 0.5497388243675232, "rewards/margins": -0.17036664485931396, "rewards/rejected": 0.7201054692268372, "step": 428 }, { "epoch": 0.78, "learning_rate": 8.692335217411446e-08, "logits/chosen": -2.31014347076416, "logits/rejected": -2.3123466968536377, "logps/chosen": -6.427866458892822, "logps/rejected": -1.007929801940918, "loss": 0.6559, "rewards/accuracies": 1.0, "rewards/chosen": 0.6871909499168396, "rewards/margins": 0.09626340866088867, "rewards/rejected": 0.5909275412559509, "step": 429 }, { "epoch": 0.78, "learning_rate": 8.685745389274946e-08, "logits/chosen": -2.3890435695648193, "logits/rejected": -2.382927417755127, "logps/chosen": -17.112503051757812, "logps/rejected": -6.839970111846924, "loss": 0.5691, "rewards/accuracies": 1.0, "rewards/chosen": 0.5886390805244446, "rewards/margins": 0.21190515160560608, "rewards/rejected": 0.3767339289188385, "step": 430 }, { "epoch": 0.78, "learning_rate": 8.679141509956736e-08, "logits/chosen": -2.2998268604278564, "logits/rejected": -2.301865816116333, "logps/chosen": -1.9457651376724243, "logps/rejected": -2.7027440071105957, "loss": 0.8781, "rewards/accuracies": 1.0, "rewards/chosen": 0.8033811450004578, "rewards/margins": 0.24026036262512207, "rewards/rejected": 0.5631207823753357, "step": 431 }, { "epoch": 0.78, "learning_rate": 8.672523604632808e-08, "logits/chosen": -2.2700374126434326, "logits/rejected": -2.303926467895508, "logps/chosen": -2.640104055404663, "logps/rejected": -27.191619873046875, "loss": 0.5655, "rewards/accuracies": 1.0, "rewards/chosen": 0.5267742276191711, "rewards/margins": 0.20527973771095276, "rewards/rejected": 0.3214944899082184, "step": 432 }, { "epoch": 0.78, "learning_rate": 8.665891698532633e-08, "logits/chosen": -2.41111159324646, "logits/rejected": -2.4192399978637695, "logps/chosen": -2.180694818496704, "logps/rejected": -4.056878089904785, "loss": 0.5637, "rewards/accuracies": 0.0, "rewards/chosen": 0.6429500579833984, "rewards/margins": -0.09708148241043091, "rewards/rejected": 0.7400315403938293, "step": 433 }, { "epoch": 0.78, "learning_rate": 8.65924581693905e-08, "logits/chosen": -2.30403733253479, "logits/rejected": -2.3115036487579346, "logps/chosen": -1.7304701805114746, "logps/rejected": -2.594679832458496, "loss": 0.6676, "rewards/accuracies": 1.0, "rewards/chosen": 0.6166835427284241, "rewards/margins": 0.18077325820922852, "rewards/rejected": 0.43591028451919556, "step": 434 }, { "epoch": 0.79, "learning_rate": 8.652585985188181e-08, "logits/chosen": -2.3606536388397217, "logits/rejected": -2.3609824180603027, "logps/chosen": -2.9169743061065674, "logps/rejected": -4.405336380004883, "loss": 0.4435, "rewards/accuracies": 1.0, "rewards/chosen": 0.9280312657356262, "rewards/margins": 0.3301628828048706, "rewards/rejected": 0.5978683829307556, "step": 435 }, { "epoch": 0.79, "learning_rate": 8.645912228669328e-08, "logits/chosen": -2.2427449226379395, "logits/rejected": -2.235952377319336, "logps/chosen": -1.3041014671325684, "logps/rejected": -4.856774806976318, "loss": 0.6724, "rewards/accuracies": 0.0, "rewards/chosen": 0.47874054312705994, "rewards/margins": -0.09235277771949768, "rewards/rejected": 0.5710933208465576, "step": 436 }, { "epoch": 0.79, "learning_rate": 8.63922457282488e-08, "logits/chosen": -2.263718843460083, "logits/rejected": -2.264655828475952, "logps/chosen": -2.236022710800171, "logps/rejected": -1.4906327724456787, "loss": 0.7684, "rewards/accuracies": 0.0, "rewards/chosen": 0.520986020565033, "rewards/margins": -0.001010596752166748, "rewards/rejected": 0.5219966173171997, "step": 437 }, { "epoch": 0.79, "learning_rate": 8.632523043150213e-08, "logits/chosen": -2.290466070175171, "logits/rejected": -2.2967329025268555, "logps/chosen": -2.9208810329437256, "logps/rejected": -4.240398406982422, "loss": 0.6667, "rewards/accuracies": 1.0, "rewards/chosen": 0.7197183966636658, "rewards/margins": 0.22874462604522705, "rewards/rejected": 0.4909737706184387, "step": 438 }, { "epoch": 0.79, "learning_rate": 8.625807665193597e-08, "logits/chosen": -2.3423140048980713, "logits/rejected": -2.349005937576294, "logps/chosen": -1.5439932346343994, "logps/rejected": -5.085199356079102, "loss": 0.7036, "rewards/accuracies": 0.0, "rewards/chosen": 0.6802048087120056, "rewards/margins": -0.19222629070281982, "rewards/rejected": 0.8724310994148254, "step": 439 }, { "epoch": 0.8, "learning_rate": 8.619078464556091e-08, "logits/chosen": -2.337709426879883, "logits/rejected": -2.344095230102539, "logps/chosen": -2.760448694229126, "logps/rejected": -2.662327766418457, "loss": 0.6367, "rewards/accuracies": 1.0, "rewards/chosen": 0.6890121698379517, "rewards/margins": 0.013956844806671143, "rewards/rejected": 0.6750553250312805, "step": 440 }, { "epoch": 0.8, "learning_rate": 8.612335466891455e-08, "logits/chosen": -2.2818400859832764, "logits/rejected": -2.429165840148926, "logps/chosen": -2.1970009803771973, "logps/rejected": -26.85811996459961, "loss": 0.4194, "rewards/accuracies": 1.0, "rewards/chosen": 0.670376718044281, "rewards/margins": 0.6354814171791077, "rewards/rejected": 0.03489532694220543, "step": 441 }, { "epoch": 0.8, "learning_rate": 8.605578697906047e-08, "logits/chosen": -2.2299461364746094, "logits/rejected": -2.2890076637268066, "logps/chosen": -2.2980098724365234, "logps/rejected": -23.371143341064453, "loss": 0.6135, "rewards/accuracies": 1.0, "rewards/chosen": 0.7134584784507751, "rewards/margins": 0.29145777225494385, "rewards/rejected": 0.4220007061958313, "step": 442 }, { "epoch": 0.8, "learning_rate": 8.598808183358722e-08, "logits/chosen": -2.229930877685547, "logits/rejected": -2.2331576347351074, "logps/chosen": -2.2109622955322266, "logps/rejected": -2.117919921875, "loss": 0.7511, "rewards/accuracies": 0.0, "rewards/chosen": 0.47786006331443787, "rewards/margins": -0.2237975299358368, "rewards/rejected": 0.7016575932502747, "step": 443 }, { "epoch": 0.8, "learning_rate": 8.592023949060738e-08, "logits/chosen": -2.3913707733154297, "logits/rejected": -2.4170732498168945, "logps/chosen": -2.348155975341797, "logps/rejected": -24.31256675720215, "loss": 0.7419, "rewards/accuracies": 0.0, "rewards/chosen": 0.5564479231834412, "rewards/margins": -0.0973665714263916, "rewards/rejected": 0.6538144946098328, "step": 444 }, { "epoch": 0.8, "learning_rate": 8.585226020875663e-08, "logits/chosen": -2.2968533039093018, "logits/rejected": -2.293668746948242, "logps/chosen": -1.5199291706085205, "logps/rejected": -6.512607097625732, "loss": 0.7257, "rewards/accuracies": 0.0, "rewards/chosen": 0.5374965667724609, "rewards/margins": -0.22271031141281128, "rewards/rejected": 0.7602068781852722, "step": 445 }, { "epoch": 0.81, "learning_rate": 8.578414424719259e-08, "logits/chosen": -2.3743085861206055, "logits/rejected": -2.3764331340789795, "logps/chosen": -2.4748597145080566, "logps/rejected": -9.541059494018555, "loss": 0.6751, "rewards/accuracies": 0.0, "rewards/chosen": 0.5469115376472473, "rewards/margins": -0.28596580028533936, "rewards/rejected": 0.8328773379325867, "step": 446 }, { "epoch": 0.81, "learning_rate": 8.571589186559406e-08, "logits/chosen": -2.3164191246032715, "logits/rejected": -2.3185298442840576, "logps/chosen": -2.89968204498291, "logps/rejected": -2.4430339336395264, "loss": 0.7377, "rewards/accuracies": 0.0, "rewards/chosen": 0.8619084358215332, "rewards/margins": -0.01899033784866333, "rewards/rejected": 0.8808987736701965, "step": 447 }, { "epoch": 0.81, "learning_rate": 8.564750332415984e-08, "logits/chosen": -2.377131462097168, "logits/rejected": -2.469404935836792, "logps/chosen": -2.0449674129486084, "logps/rejected": -32.01094055175781, "loss": 0.5767, "rewards/accuracies": 1.0, "rewards/chosen": 0.504690945148468, "rewards/margins": 0.744737446308136, "rewards/rejected": -0.24004650115966797, "step": 448 }, { "epoch": 0.81, "learning_rate": 8.557897888360787e-08, "logits/chosen": -2.328495502471924, "logits/rejected": -2.4117162227630615, "logps/chosen": -1.6439580917358398, "logps/rejected": -39.22455596923828, "loss": 0.64, "rewards/accuracies": 1.0, "rewards/chosen": 0.5982085466384888, "rewards/margins": 1.1926841735839844, "rewards/rejected": -0.5944755673408508, "step": 449 }, { "epoch": 0.81, "learning_rate": 8.551031880517409e-08, "logits/chosen": -2.338578939437866, "logits/rejected": -2.33711314201355, "logps/chosen": -1.250537395477295, "logps/rejected": -2.050894021987915, "loss": 0.5891, "rewards/accuracies": 0.0, "rewards/chosen": 0.6482502818107605, "rewards/margins": -0.03131914138793945, "rewards/rejected": 0.6795694231987, "step": 450 }, { "epoch": 0.82, "learning_rate": 8.544152335061164e-08, "logits/chosen": -2.3537251949310303, "logits/rejected": -2.353290319442749, "logps/chosen": -9.68940258026123, "logps/rejected": -5.14683723449707, "loss": 0.5273, "rewards/accuracies": 1.0, "rewards/chosen": 0.73237544298172, "rewards/margins": 0.2210577130317688, "rewards/rejected": 0.5113177299499512, "step": 451 }, { "epoch": 0.82, "learning_rate": 8.537259278218969e-08, "logits/chosen": -2.3217456340789795, "logits/rejected": -2.324880838394165, "logps/chosen": -7.6184587478637695, "logps/rejected": -9.359454154968262, "loss": 0.733, "rewards/accuracies": 0.0, "rewards/chosen": 0.48980018496513367, "rewards/margins": -0.3387353718280792, "rewards/rejected": 0.8285355567932129, "step": 452 }, { "epoch": 0.82, "learning_rate": 8.530352736269248e-08, "logits/chosen": -2.380451202392578, "logits/rejected": -2.4142866134643555, "logps/chosen": -5.734133720397949, "logps/rejected": -28.370351791381836, "loss": 0.6879, "rewards/accuracies": 1.0, "rewards/chosen": 0.9749341011047363, "rewards/margins": 0.37617063522338867, "rewards/rejected": 0.5987634658813477, "step": 453 }, { "epoch": 0.82, "learning_rate": 8.523432735541844e-08, "logits/chosen": -2.353832483291626, "logits/rejected": -2.3572821617126465, "logps/chosen": -2.3673644065856934, "logps/rejected": -3.685042381286621, "loss": 0.746, "rewards/accuracies": 1.0, "rewards/chosen": 0.5111852884292603, "rewards/margins": 0.06942227482795715, "rewards/rejected": 0.4417630136013031, "step": 454 }, { "epoch": 0.82, "learning_rate": 8.516499302417899e-08, "logits/chosen": -2.290919303894043, "logits/rejected": -2.2857937812805176, "logps/chosen": -12.091516494750977, "logps/rejected": -2.5864546298980713, "loss": 0.7402, "rewards/accuracies": 0.0, "rewards/chosen": 0.35117408633232117, "rewards/margins": -0.2884986698627472, "rewards/rejected": 0.6396727561950684, "step": 455 }, { "epoch": 0.82, "learning_rate": 8.50955246332977e-08, "logits/chosen": -2.2989583015441895, "logits/rejected": -2.3555734157562256, "logps/chosen": -5.075331211090088, "logps/rejected": -26.036226272583008, "loss": 0.7152, "rewards/accuracies": 1.0, "rewards/chosen": 0.7657740116119385, "rewards/margins": 0.4067951738834381, "rewards/rejected": 0.35897883772850037, "step": 456 }, { "epoch": 0.83, "learning_rate": 8.502592244760918e-08, "logits/chosen": -2.355006694793701, "logits/rejected": -2.3312294483184814, "logps/chosen": -1.845207929611206, "logps/rejected": 0.0, "loss": 0.5894, "rewards/accuracies": 1.0, "rewards/chosen": 0.642392635345459, "rewards/margins": 0.642392635345459, "rewards/rejected": 0.0, "step": 457 }, { "epoch": 0.83, "learning_rate": 8.49561867324581e-08, "logits/chosen": -2.411262273788452, "logits/rejected": -2.4157769680023193, "logps/chosen": -1.8985192775726318, "logps/rejected": -2.558507204055786, "loss": 0.6974, "rewards/accuracies": 0.0, "rewards/chosen": 0.7299262881278992, "rewards/margins": -0.01694345474243164, "rewards/rejected": 0.7468697428703308, "step": 458 }, { "epoch": 0.83, "learning_rate": 8.488631775369824e-08, "logits/chosen": -2.3265252113342285, "logits/rejected": -2.3070085048675537, "logps/chosen": -10.730001449584961, "logps/rejected": -1.7609739303588867, "loss": 0.6786, "rewards/accuracies": 1.0, "rewards/chosen": 0.8551118969917297, "rewards/margins": 0.2623644471168518, "rewards/rejected": 0.5927474498748779, "step": 459 }, { "epoch": 0.83, "learning_rate": 8.481631577769135e-08, "logits/chosen": -2.3525917530059814, "logits/rejected": -2.4826154708862305, "logps/chosen": -4.684876441955566, "logps/rejected": -32.861045837402344, "loss": 0.6904, "rewards/accuracies": 1.0, "rewards/chosen": 0.5287864804267883, "rewards/margins": 0.6916408538818359, "rewards/rejected": -0.1628543883562088, "step": 460 }, { "epoch": 0.83, "learning_rate": 8.474618107130624e-08, "logits/chosen": -2.4099559783935547, "logits/rejected": -2.4072258472442627, "logps/chosen": -2.959022283554077, "logps/rejected": -5.914085865020752, "loss": 0.5582, "rewards/accuracies": 1.0, "rewards/chosen": 0.7920923233032227, "rewards/margins": 0.23847872018814087, "rewards/rejected": 0.5536136031150818, "step": 461 }, { "epoch": 0.84, "learning_rate": 8.467591390191777e-08, "logits/chosen": -2.3094403743743896, "logits/rejected": -2.307103157043457, "logps/chosen": -4.175063133239746, "logps/rejected": -2.335190534591675, "loss": 0.5738, "rewards/accuracies": 1.0, "rewards/chosen": 0.8181538581848145, "rewards/margins": 0.3796888291835785, "rewards/rejected": 0.43846502900123596, "step": 462 }, { "epoch": 0.84, "learning_rate": 8.46055145374057e-08, "logits/chosen": -2.3663859367370605, "logits/rejected": -2.3562533855438232, "logps/chosen": -4.028813362121582, "logps/rejected": -5.929318904876709, "loss": 1.0308, "rewards/accuracies": 0.0, "rewards/chosen": 0.7516230940818787, "rewards/margins": -0.1834164261817932, "rewards/rejected": 0.9350395202636719, "step": 463 }, { "epoch": 0.84, "learning_rate": 8.453498324615382e-08, "logits/chosen": -2.2265663146972656, "logits/rejected": -2.228422164916992, "logps/chosen": -2.6276679039001465, "logps/rejected": -2.6753785610198975, "loss": 0.6922, "rewards/accuracies": 1.0, "rewards/chosen": 0.7108051776885986, "rewards/margins": 0.262042760848999, "rewards/rejected": 0.4487624168395996, "step": 464 }, { "epoch": 0.84, "learning_rate": 8.446432029704889e-08, "logits/chosen": -2.161661148071289, "logits/rejected": -2.189784526824951, "logps/chosen": -3.024606704711914, "logps/rejected": -3.3752450942993164, "loss": 0.6042, "rewards/accuracies": 1.0, "rewards/chosen": 0.7735700607299805, "rewards/margins": 0.052726149559020996, "rewards/rejected": 0.7208439111709595, "step": 465 }, { "epoch": 0.84, "learning_rate": 8.439352595947947e-08, "logits/chosen": -2.264111280441284, "logits/rejected": -2.261672258377075, "logps/chosen": -2.1310207843780518, "logps/rejected": -3.3760602474212646, "loss": 0.6164, "rewards/accuracies": 1.0, "rewards/chosen": 0.7693278193473816, "rewards/margins": 0.12682360410690308, "rewards/rejected": 0.6425042152404785, "step": 466 }, { "epoch": 0.84, "learning_rate": 8.432260050333517e-08, "logits/chosen": -2.4922780990600586, "logits/rejected": -2.4909093379974365, "logps/chosen": -11.53754711151123, "logps/rejected": -3.9719772338867188, "loss": 0.857, "rewards/accuracies": 0.0, "rewards/chosen": 0.2974833548069, "rewards/margins": -0.47725239396095276, "rewards/rejected": 0.7747357487678528, "step": 467 }, { "epoch": 0.85, "learning_rate": 8.425154419900533e-08, "logits/chosen": -2.3660309314727783, "logits/rejected": -2.362720012664795, "logps/chosen": -8.21037769317627, "logps/rejected": -2.8280699253082275, "loss": 0.793, "rewards/accuracies": 1.0, "rewards/chosen": 0.8958956599235535, "rewards/margins": 0.1083906888961792, "rewards/rejected": 0.7875049710273743, "step": 468 }, { "epoch": 0.85, "learning_rate": 8.418035731737823e-08, "logits/chosen": -2.224297523498535, "logits/rejected": -2.259957790374756, "logps/chosen": -7.249107360839844, "logps/rejected": -14.228751182556152, "loss": 0.4533, "rewards/accuracies": 1.0, "rewards/chosen": 0.6713722348213196, "rewards/margins": 0.37291213870048523, "rewards/rejected": 0.29846009612083435, "step": 469 }, { "epoch": 0.85, "learning_rate": 8.410904012983984e-08, "logits/chosen": -2.227832078933716, "logits/rejected": -2.232987880706787, "logps/chosen": -6.1386213302612305, "logps/rejected": -2.870783567428589, "loss": 0.5644, "rewards/accuracies": 1.0, "rewards/chosen": 1.151962399482727, "rewards/margins": 0.36907273530960083, "rewards/rejected": 0.7828896641731262, "step": 470 }, { "epoch": 0.85, "learning_rate": 8.403759290827301e-08, "logits/chosen": -2.2632460594177246, "logits/rejected": -2.4021854400634766, "logps/chosen": -2.597642183303833, "logps/rejected": -35.29798126220703, "loss": 0.5894, "rewards/accuracies": 1.0, "rewards/chosen": 0.7044054865837097, "rewards/margins": 0.23302409052848816, "rewards/rejected": 0.47138139605522156, "step": 471 }, { "epoch": 0.85, "learning_rate": 8.396601592505625e-08, "logits/chosen": -2.2759037017822266, "logits/rejected": -2.2865335941314697, "logps/chosen": -2.0516929626464844, "logps/rejected": -3.2593016624450684, "loss": 0.8614, "rewards/accuracies": 0.0, "rewards/chosen": 0.5589420199394226, "rewards/margins": -0.04079693555831909, "rewards/rejected": 0.5997389554977417, "step": 472 }, { "epoch": 0.86, "learning_rate": 8.389430945306278e-08, "logits/chosen": -2.341581344604492, "logits/rejected": -2.3424692153930664, "logps/chosen": -1.5381220579147339, "logps/rejected": -2.3141276836395264, "loss": 0.6651, "rewards/accuracies": 1.0, "rewards/chosen": 0.6019555330276489, "rewards/margins": 0.011537253856658936, "rewards/rejected": 0.59041827917099, "step": 473 }, { "epoch": 0.86, "learning_rate": 8.382247376565943e-08, "logits/chosen": -2.2269790172576904, "logits/rejected": -2.231123685836792, "logps/chosen": -5.775473594665527, "logps/rejected": -1.972064733505249, "loss": 0.64, "rewards/accuracies": 1.0, "rewards/chosen": 0.5976645350456238, "rewards/margins": 0.06915760040283203, "rewards/rejected": 0.5285069346427917, "step": 474 }, { "epoch": 0.86, "learning_rate": 8.375050913670572e-08, "logits/chosen": -2.373014211654663, "logits/rejected": -2.376795530319214, "logps/chosen": -2.433176279067993, "logps/rejected": -3.134866237640381, "loss": 0.6965, "rewards/accuracies": 1.0, "rewards/chosen": 0.8374902606010437, "rewards/margins": 0.12151938676834106, "rewards/rejected": 0.7159708738327026, "step": 475 }, { "epoch": 0.86, "learning_rate": 8.367841584055267e-08, "logits/chosen": -2.2921502590179443, "logits/rejected": -2.289578437805176, "logps/chosen": -2.474050998687744, "logps/rejected": -2.0431697368621826, "loss": 0.694, "rewards/accuracies": 0.0, "rewards/chosen": 0.4689173698425293, "rewards/margins": -0.1276819109916687, "rewards/rejected": 0.596599280834198, "step": 476 }, { "epoch": 0.86, "learning_rate": 8.360619415204182e-08, "logits/chosen": -2.313812017440796, "logits/rejected": -2.377511501312256, "logps/chosen": -1.5976742506027222, "logps/rejected": -31.478778839111328, "loss": 0.4528, "rewards/accuracies": 1.0, "rewards/chosen": 0.8177399039268494, "rewards/margins": 0.7866684198379517, "rewards/rejected": 0.03107147291302681, "step": 477 }, { "epoch": 0.86, "learning_rate": 8.353384434650424e-08, "logits/chosen": -2.3129398822784424, "logits/rejected": -2.284158945083618, "logps/chosen": -1.543617606163025, "logps/rejected": 0.0, "loss": 0.5107, "rewards/accuracies": 1.0, "rewards/chosen": 0.7086973786354065, "rewards/margins": 0.7086973786354065, "rewards/rejected": 0.0, "step": 478 }, { "epoch": 0.87, "learning_rate": 8.346136669975935e-08, "logits/chosen": -2.296142578125, "logits/rejected": -2.302316904067993, "logps/chosen": -1.425888180732727, "logps/rejected": -2.7161052227020264, "loss": 0.6592, "rewards/accuracies": 1.0, "rewards/chosen": 0.6632410883903503, "rewards/margins": 0.025359511375427246, "rewards/rejected": 0.6378815770149231, "step": 479 }, { "epoch": 0.87, "learning_rate": 8.338876148811399e-08, "logits/chosen": -2.347168445587158, "logits/rejected": -2.3248839378356934, "logps/chosen": -4.143003940582275, "logps/rejected": 0.0, "loss": 0.4371, "rewards/accuracies": 1.0, "rewards/chosen": 0.657359778881073, "rewards/margins": 0.657359778881073, "rewards/rejected": 0.0, "step": 480 }, { "epoch": 0.87, "learning_rate": 8.331602898836125e-08, "logits/chosen": -2.2572405338287354, "logits/rejected": -2.359927177429199, "logps/chosen": -3.750068187713623, "logps/rejected": -33.6565055847168, "loss": 0.4893, "rewards/accuracies": 1.0, "rewards/chosen": 0.5193369388580322, "rewards/margins": 0.7566379904747009, "rewards/rejected": -0.2373010665178299, "step": 481 }, { "epoch": 0.87, "learning_rate": 8.324316947777958e-08, "logits/chosen": -2.3179421424865723, "logits/rejected": -2.2987611293792725, "logps/chosen": -8.14925765991211, "logps/rejected": 0.0, "loss": 0.7006, "rewards/accuracies": 1.0, "rewards/chosen": 0.41558724641799927, "rewards/margins": 0.41558724641799927, "rewards/rejected": 0.0, "step": 482 }, { "epoch": 0.87, "learning_rate": 8.317018323413156e-08, "logits/chosen": -2.382046937942505, "logits/rejected": -2.3757290840148926, "logps/chosen": -3.5222043991088867, "logps/rejected": -12.587602615356445, "loss": 0.9671, "rewards/accuracies": 1.0, "rewards/chosen": 0.6568706631660461, "rewards/margins": 0.22234821319580078, "rewards/rejected": 0.43452244997024536, "step": 483 }, { "epoch": 0.88, "learning_rate": 8.309707053566293e-08, "logits/chosen": -2.2972095012664795, "logits/rejected": -2.300676107406616, "logps/chosen": -5.422080993652344, "logps/rejected": -2.7033474445343018, "loss": 0.6404, "rewards/accuracies": 1.0, "rewards/chosen": 0.8042384386062622, "rewards/margins": 0.21744650602340698, "rewards/rejected": 0.5867919325828552, "step": 484 }, { "epoch": 0.88, "learning_rate": 8.302383166110151e-08, "logits/chosen": -2.3999476432800293, "logits/rejected": -2.4004266262054443, "logps/chosen": -10.52985668182373, "logps/rejected": -5.433956146240234, "loss": 0.7457, "rewards/accuracies": 0.0, "rewards/chosen": 0.6034998893737793, "rewards/margins": -0.083668053150177, "rewards/rejected": 0.6871679425239563, "step": 485 }, { "epoch": 0.88, "learning_rate": 8.29504668896562e-08, "logits/chosen": -2.288790464401245, "logits/rejected": -2.2803008556365967, "logps/chosen": -5.045779705047607, "logps/rejected": -6.202271461486816, "loss": 0.724, "rewards/accuracies": 1.0, "rewards/chosen": 0.6621930003166199, "rewards/margins": 0.3072545826435089, "rewards/rejected": 0.35493841767311096, "step": 486 }, { "epoch": 0.88, "learning_rate": 8.287697650101574e-08, "logits/chosen": -2.367872953414917, "logits/rejected": -2.3669230937957764, "logps/chosen": -1.8511890172958374, "logps/rejected": -5.622732162475586, "loss": 0.5244, "rewards/accuracies": 1.0, "rewards/chosen": 0.6834405660629272, "rewards/margins": 0.3178577125072479, "rewards/rejected": 0.3655828535556793, "step": 487 }, { "epoch": 0.88, "learning_rate": 8.280336077534786e-08, "logits/chosen": -2.3320975303649902, "logits/rejected": -2.3786873817443848, "logps/chosen": -2.4046669006347656, "logps/rejected": -22.118927001953125, "loss": 0.4392, "rewards/accuracies": 1.0, "rewards/chosen": 0.6338962912559509, "rewards/margins": 0.5456383228302002, "rewards/rejected": 0.08825798332691193, "step": 488 }, { "epoch": 0.88, "learning_rate": 8.272961999329809e-08, "logits/chosen": -2.274081230163574, "logits/rejected": -2.276101589202881, "logps/chosen": -1.0005460977554321, "logps/rejected": -8.38158130645752, "loss": 0.8692, "rewards/accuracies": 0.0, "rewards/chosen": 0.5216559767723083, "rewards/margins": -0.4256652593612671, "rewards/rejected": 0.9473212361335754, "step": 489 }, { "epoch": 0.89, "learning_rate": 8.26557544359887e-08, "logits/chosen": -2.3305492401123047, "logits/rejected": -2.334022045135498, "logps/chosen": -1.7172592878341675, "logps/rejected": -1.6460418701171875, "loss": 0.6965, "rewards/accuracies": 1.0, "rewards/chosen": 0.6210407614707947, "rewards/margins": 0.09180861711502075, "rewards/rejected": 0.5292321443557739, "step": 490 }, { "epoch": 0.89, "learning_rate": 8.258176438501764e-08, "logits/chosen": -2.3327736854553223, "logits/rejected": -2.3392961025238037, "logps/chosen": -2.5986227989196777, "logps/rejected": -13.942705154418945, "loss": 0.634, "rewards/accuracies": 0.0, "rewards/chosen": 0.5844990611076355, "rewards/margins": -0.16810238361358643, "rewards/rejected": 0.7526014447212219, "step": 491 }, { "epoch": 0.89, "learning_rate": 8.250765012245746e-08, "logits/chosen": -2.3228657245635986, "logits/rejected": -2.293567657470703, "logps/chosen": -2.0770788192749023, "logps/rejected": 0.0, "loss": 0.4901, "rewards/accuracies": 1.0, "rewards/chosen": 0.6812331080436707, "rewards/margins": 0.6812331080436707, "rewards/rejected": 0.0, "step": 492 }, { "epoch": 0.89, "learning_rate": 8.24334119308543e-08, "logits/chosen": -2.2514355182647705, "logits/rejected": -2.255380153656006, "logps/chosen": -2.4559473991394043, "logps/rejected": -9.793612480163574, "loss": 0.7177, "rewards/accuracies": 0.0, "rewards/chosen": 0.5612048506736755, "rewards/margins": -0.2774181365966797, "rewards/rejected": 0.8386229872703552, "step": 493 }, { "epoch": 0.89, "learning_rate": 8.235905009322667e-08, "logits/chosen": -2.3701114654541016, "logits/rejected": -2.387726068496704, "logps/chosen": -4.092512130737305, "logps/rejected": -9.432184219360352, "loss": 0.6419, "rewards/accuracies": 1.0, "rewards/chosen": 0.7419951558113098, "rewards/margins": 0.36752063035964966, "rewards/rejected": 0.37447452545166016, "step": 494 }, { "epoch": 0.9, "learning_rate": 8.228456489306451e-08, "logits/chosen": -2.468031883239746, "logits/rejected": -2.405552864074707, "logps/chosen": -20.1702823638916, "logps/rejected": -3.0794882774353027, "loss": 0.7239, "rewards/accuracies": 0.0, "rewards/chosen": -0.031049346551299095, "rewards/margins": -0.6380704045295715, "rewards/rejected": 0.6070210337638855, "step": 495 }, { "epoch": 0.9, "learning_rate": 8.220995661432804e-08, "logits/chosen": -2.330991506576538, "logits/rejected": -2.3344686031341553, "logps/chosen": -1.4200913906097412, "logps/rejected": -1.563180923461914, "loss": 0.6241, "rewards/accuracies": 1.0, "rewards/chosen": 0.5826465487480164, "rewards/margins": 0.10309681296348572, "rewards/rejected": 0.47954973578453064, "step": 496 }, { "epoch": 0.9, "learning_rate": 8.213522554144672e-08, "logits/chosen": -2.468806505203247, "logits/rejected": -2.4290459156036377, "logps/chosen": -7.425769805908203, "logps/rejected": -1.9977989196777344, "loss": 0.7584, "rewards/accuracies": 1.0, "rewards/chosen": 0.7565769553184509, "rewards/margins": 0.1128990650177002, "rewards/rejected": 0.6436778903007507, "step": 497 }, { "epoch": 0.9, "learning_rate": 8.206037195931807e-08, "logits/chosen": -2.441392421722412, "logits/rejected": -2.4411909580230713, "logps/chosen": -2.4106380939483643, "logps/rejected": -1.948134422302246, "loss": 0.5412, "rewards/accuracies": 1.0, "rewards/chosen": 0.6883118748664856, "rewards/margins": 0.002552628517150879, "rewards/rejected": 0.6857592463493347, "step": 498 }, { "epoch": 0.9, "learning_rate": 8.198539615330674e-08, "logits/chosen": -2.356297492980957, "logits/rejected": -2.332263708114624, "logps/chosen": -1.3842496871948242, "logps/rejected": 0.0, "loss": 0.5768, "rewards/accuracies": 1.0, "rewards/chosen": 0.5973777770996094, "rewards/margins": 0.5973777770996094, "rewards/rejected": 0.0, "step": 499 }, { "epoch": 0.9, "learning_rate": 8.191029840924326e-08, "logits/chosen": -2.2155873775482178, "logits/rejected": -2.203841209411621, "logps/chosen": -2.1415162086486816, "logps/rejected": -3.5983080863952637, "loss": 0.6101, "rewards/accuracies": 1.0, "rewards/chosen": 0.6756353974342346, "rewards/margins": 0.1110803484916687, "rewards/rejected": 0.5645550489425659, "step": 500 }, { "epoch": 0.91, "learning_rate": 8.183507901342308e-08, "logits/chosen": -2.313420295715332, "logits/rejected": -2.3148117065429688, "logps/chosen": -1.6667224168777466, "logps/rejected": -1.2839182615280151, "loss": 0.618, "rewards/accuracies": 1.0, "rewards/chosen": 0.7105202078819275, "rewards/margins": 0.1683397889137268, "rewards/rejected": 0.5421804189682007, "step": 501 }, { "epoch": 0.91, "learning_rate": 8.175973825260538e-08, "logits/chosen": -2.1889700889587402, "logits/rejected": -2.2321131229400635, "logps/chosen": -9.015246391296387, "logps/rejected": -18.708221435546875, "loss": 0.5441, "rewards/accuracies": 1.0, "rewards/chosen": 0.764990508556366, "rewards/margins": 0.2590038776397705, "rewards/rejected": 0.5059866309165955, "step": 502 }, { "epoch": 0.91, "learning_rate": 8.168427641401205e-08, "logits/chosen": -2.3424441814422607, "logits/rejected": -2.3540141582489014, "logps/chosen": -2.530961513519287, "logps/rejected": -6.33937931060791, "loss": 0.7237, "rewards/accuracies": 1.0, "rewards/chosen": 0.6442719101905823, "rewards/margins": 0.21858057379722595, "rewards/rejected": 0.4256913363933563, "step": 503 }, { "epoch": 0.91, "learning_rate": 8.160869378532656e-08, "logits/chosen": -2.28129243850708, "logits/rejected": -2.2804532051086426, "logps/chosen": -3.4658043384552, "logps/rejected": -3.836188554763794, "loss": 0.5825, "rewards/accuracies": 1.0, "rewards/chosen": 0.7618932723999023, "rewards/margins": 0.27939510345458984, "rewards/rejected": 0.4824981689453125, "step": 504 }, { "epoch": 0.91, "learning_rate": 8.153299065469286e-08, "logits/chosen": -2.4116568565368652, "logits/rejected": -2.4087936878204346, "logps/chosen": -2.540421724319458, "logps/rejected": -7.924430847167969, "loss": 0.7071, "rewards/accuracies": 0.0, "rewards/chosen": 0.7455195784568787, "rewards/margins": -0.14415287971496582, "rewards/rejected": 0.8896724581718445, "step": 505 }, { "epoch": 0.92, "learning_rate": 8.145716731071429e-08, "logits/chosen": -2.1943163871765137, "logits/rejected": -2.18709135055542, "logps/chosen": -1.5935261249542236, "logps/rejected": -6.912136554718018, "loss": 0.6341, "rewards/accuracies": 0.0, "rewards/chosen": 0.6216203570365906, "rewards/margins": -0.21734946966171265, "rewards/rejected": 0.8389698266983032, "step": 506 }, { "epoch": 0.92, "learning_rate": 8.138122404245248e-08, "logits/chosen": -2.402811288833618, "logits/rejected": -2.396850347518921, "logps/chosen": -2.6743764877319336, "logps/rejected": -5.107244491577148, "loss": 0.7218, "rewards/accuracies": 0.0, "rewards/chosen": 0.551629364490509, "rewards/margins": -0.31981879472732544, "rewards/rejected": 0.8714481592178345, "step": 507 }, { "epoch": 0.92, "learning_rate": 8.130516113942626e-08, "logits/chosen": -2.2915735244750977, "logits/rejected": -2.287696361541748, "logps/chosen": -9.48514461517334, "logps/rejected": -4.669517993927002, "loss": 0.6863, "rewards/accuracies": 1.0, "rewards/chosen": 0.4761224687099457, "rewards/margins": 0.0920267403125763, "rewards/rejected": 0.3840957283973694, "step": 508 }, { "epoch": 0.92, "learning_rate": 8.122897889161054e-08, "logits/chosen": -2.341525077819824, "logits/rejected": -2.31815242767334, "logps/chosen": -3.836301565170288, "logps/rejected": 0.0, "loss": 0.4962, "rewards/accuracies": 1.0, "rewards/chosen": 0.9307789206504822, "rewards/margins": 0.9307789206504822, "rewards/rejected": 0.0, "step": 509 }, { "epoch": 0.92, "learning_rate": 8.11526775894352e-08, "logits/chosen": -2.2823433876037598, "logits/rejected": -2.254686117172241, "logps/chosen": -4.549769401550293, "logps/rejected": -3.0525779724121094, "loss": 0.6651, "rewards/accuracies": 1.0, "rewards/chosen": 0.7549411654472351, "rewards/margins": 0.06893783807754517, "rewards/rejected": 0.6860033273696899, "step": 510 }, { "epoch": 0.92, "learning_rate": 8.107625752378397e-08, "logits/chosen": -2.26096773147583, "logits/rejected": -2.263550281524658, "logps/chosen": -1.9729013442993164, "logps/rejected": -2.9663264751434326, "loss": 0.7148, "rewards/accuracies": 1.0, "rewards/chosen": 0.6142634749412537, "rewards/margins": 0.06360101699829102, "rewards/rejected": 0.5506624579429626, "step": 511 }, { "epoch": 0.93, "learning_rate": 8.099971898599342e-08, "logits/chosen": -2.281559705734253, "logits/rejected": -2.2704052925109863, "logps/chosen": -4.407376289367676, "logps/rejected": -9.903799057006836, "loss": 0.6782, "rewards/accuracies": 1.0, "rewards/chosen": 0.709592342376709, "rewards/margins": 0.060570597648620605, "rewards/rejected": 0.6490217447280884, "step": 512 }, { "epoch": 0.93, "learning_rate": 8.092306226785169e-08, "logits/chosen": -2.4508068561553955, "logits/rejected": -2.451878070831299, "logps/chosen": -2.68212628364563, "logps/rejected": -2.7861428260803223, "loss": 0.7391, "rewards/accuracies": 0.0, "rewards/chosen": 0.7173762321472168, "rewards/margins": -0.05357092618942261, "rewards/rejected": 0.7709471583366394, "step": 513 }, { "epoch": 0.93, "learning_rate": 8.084628766159748e-08, "logits/chosen": -2.249962329864502, "logits/rejected": -2.230384111404419, "logps/chosen": -6.94966459274292, "logps/rejected": -2.2102906703948975, "loss": 0.5511, "rewards/accuracies": 1.0, "rewards/chosen": 0.923151969909668, "rewards/margins": 0.37761497497558594, "rewards/rejected": 0.545536994934082, "step": 514 }, { "epoch": 0.93, "learning_rate": 8.076939545991895e-08, "logits/chosen": -2.356315851211548, "logits/rejected": -2.3569071292877197, "logps/chosen": -11.045951843261719, "logps/rejected": -4.337956428527832, "loss": 0.6488, "rewards/accuracies": 1.0, "rewards/chosen": 0.8635292053222656, "rewards/margins": 0.19581842422485352, "rewards/rejected": 0.6677107810974121, "step": 515 }, { "epoch": 0.93, "learning_rate": 8.069238595595252e-08, "logits/chosen": -2.2303383350372314, "logits/rejected": -2.2026147842407227, "logps/chosen": -2.266986608505249, "logps/rejected": 0.0, "loss": 0.5474, "rewards/accuracies": 1.0, "rewards/chosen": 0.5446974635124207, "rewards/margins": 0.5446974635124207, "rewards/rejected": 0.0, "step": 516 }, { "epoch": 0.93, "learning_rate": 8.061525944328183e-08, "logits/chosen": -2.3568034172058105, "logits/rejected": -2.362936496734619, "logps/chosen": -2.169628381729126, "logps/rejected": -2.1629655361175537, "loss": 0.7497, "rewards/accuracies": 0.0, "rewards/chosen": 0.4139406383037567, "rewards/margins": -0.11630228161811829, "rewards/rejected": 0.530242919921875, "step": 517 }, { "epoch": 0.94, "learning_rate": 8.05380162159366e-08, "logits/chosen": -2.3647329807281494, "logits/rejected": -2.370271682739258, "logps/chosen": -4.543652534484863, "logps/rejected": -11.684011459350586, "loss": 0.5586, "rewards/accuracies": 1.0, "rewards/chosen": 0.8687871098518372, "rewards/margins": 0.45391082763671875, "rewards/rejected": 0.4148762822151184, "step": 518 }, { "epoch": 0.94, "learning_rate": 8.046065656839151e-08, "logits/chosen": -2.3205010890960693, "logits/rejected": -2.325408458709717, "logps/chosen": -2.224918842315674, "logps/rejected": -11.469955444335938, "loss": 0.5869, "rewards/accuracies": 1.0, "rewards/chosen": 0.7597413659095764, "rewards/margins": 0.2799512445926666, "rewards/rejected": 0.4797901213169098, "step": 519 }, { "epoch": 0.94, "learning_rate": 8.038318079556499e-08, "logits/chosen": -2.46916127204895, "logits/rejected": -2.4739465713500977, "logps/chosen": -4.2416276931762695, "logps/rejected": -8.804232597351074, "loss": 0.5188, "rewards/accuracies": 1.0, "rewards/chosen": 0.8490027785301208, "rewards/margins": 0.4557436406612396, "rewards/rejected": 0.3932591378688812, "step": 520 }, { "epoch": 0.94, "learning_rate": 8.03055891928183e-08, "logits/chosen": -2.3877511024475098, "logits/rejected": -2.3802175521850586, "logps/chosen": -3.5414276123046875, "logps/rejected": -3.3508424758911133, "loss": 0.6835, "rewards/accuracies": 1.0, "rewards/chosen": 1.073112964630127, "rewards/margins": 0.28930026292800903, "rewards/rejected": 0.7838127017021179, "step": 521 }, { "epoch": 0.94, "learning_rate": 8.022788205595418e-08, "logits/chosen": -2.3693556785583496, "logits/rejected": -2.3714091777801514, "logps/chosen": -2.566821813583374, "logps/rejected": -8.580557823181152, "loss": 0.7936, "rewards/accuracies": 0.0, "rewards/chosen": 0.5421335101127625, "rewards/margins": -0.3680373430252075, "rewards/rejected": 0.91017085313797, "step": 522 }, { "epoch": 0.95, "learning_rate": 8.015005968121585e-08, "logits/chosen": -2.3048994541168213, "logits/rejected": -2.299865961074829, "logps/chosen": -3.29521107673645, "logps/rejected": -2.371127128601074, "loss": 0.6709, "rewards/accuracies": 0.0, "rewards/chosen": 0.6763896346092224, "rewards/margins": -0.053638994693756104, "rewards/rejected": 0.7300286293029785, "step": 523 }, { "epoch": 0.95, "learning_rate": 8.007212236528588e-08, "logits/chosen": -2.3933217525482178, "logits/rejected": -2.3902275562286377, "logps/chosen": -1.798630952835083, "logps/rejected": -9.30685043334961, "loss": 0.6487, "rewards/accuracies": 0.0, "rewards/chosen": 0.7570669054985046, "rewards/margins": -0.028039216995239258, "rewards/rejected": 0.7851061224937439, "step": 524 }, { "epoch": 0.95, "learning_rate": 7.999407040528499e-08, "logits/chosen": -2.5081980228424072, "logits/rejected": -2.505664348602295, "logps/chosen": -1.768647313117981, "logps/rejected": -3.932931661605835, "loss": 0.5742, "rewards/accuracies": 1.0, "rewards/chosen": 0.6481763124465942, "rewards/margins": 0.06727457046508789, "rewards/rejected": 0.5809017419815063, "step": 525 }, { "epoch": 0.95, "learning_rate": 7.991590409877098e-08, "logits/chosen": -2.2610771656036377, "logits/rejected": -2.25458025932312, "logps/chosen": -1.496669888496399, "logps/rejected": -5.8104987144470215, "loss": 0.6623, "rewards/accuracies": 1.0, "rewards/chosen": 0.6519613265991211, "rewards/margins": 0.014405310153961182, "rewards/rejected": 0.6375560164451599, "step": 526 }, { "epoch": 0.95, "learning_rate": 7.983762374373757e-08, "logits/chosen": -2.25342059135437, "logits/rejected": -2.260056257247925, "logps/chosen": -5.046110153198242, "logps/rejected": -2.659569501876831, "loss": 0.4945, "rewards/accuracies": 1.0, "rewards/chosen": 0.6515960693359375, "rewards/margins": 0.10751664638519287, "rewards/rejected": 0.5440794229507446, "step": 527 }, { "epoch": 0.95, "learning_rate": 7.975922963861325e-08, "logits/chosen": -2.3424181938171387, "logits/rejected": -2.338346242904663, "logps/chosen": -3.6251251697540283, "logps/rejected": -4.226137638092041, "loss": 0.7165, "rewards/accuracies": 0.0, "rewards/chosen": 0.5760512351989746, "rewards/margins": -0.05884432792663574, "rewards/rejected": 0.6348955631256104, "step": 528 }, { "epoch": 0.96, "learning_rate": 7.968072208226022e-08, "logits/chosen": -2.2487967014312744, "logits/rejected": -2.2456836700439453, "logps/chosen": -4.542071342468262, "logps/rejected": -2.821300506591797, "loss": 0.5487, "rewards/accuracies": 0.0, "rewards/chosen": 0.537803590297699, "rewards/margins": -0.09220224618911743, "rewards/rejected": 0.6300058364868164, "step": 529 }, { "epoch": 0.96, "learning_rate": 7.96021013739731e-08, "logits/chosen": -2.4165701866149902, "logits/rejected": -2.39434814453125, "logps/chosen": -1.8229687213897705, "logps/rejected": 0.0, "loss": 0.584, "rewards/accuracies": 1.0, "rewards/chosen": 0.6814175844192505, "rewards/margins": 0.6814175844192505, "rewards/rejected": 0.0, "step": 530 }, { "epoch": 0.96, "learning_rate": 7.952336781347796e-08, "logits/chosen": -2.3142333030700684, "logits/rejected": -2.3122920989990234, "logps/chosen": -1.2039278745651245, "logps/rejected": -1.971031904220581, "loss": 0.6053, "rewards/accuracies": 1.0, "rewards/chosen": 0.5940130352973938, "rewards/margins": 0.0662921667098999, "rewards/rejected": 0.5277208685874939, "step": 531 }, { "epoch": 0.96, "learning_rate": 7.944452170093104e-08, "logits/chosen": -2.3698716163635254, "logits/rejected": -2.464163303375244, "logps/chosen": -1.1470363140106201, "logps/rejected": -40.054039001464844, "loss": 0.7135, "rewards/accuracies": 0.0, "rewards/chosen": 0.5526334643363953, "rewards/margins": -0.07964193820953369, "rewards/rejected": 0.632275402545929, "step": 532 }, { "epoch": 0.96, "learning_rate": 7.93655633369177e-08, "logits/chosen": -2.2550861835479736, "logits/rejected": -2.261810302734375, "logps/chosen": -1.70745849609375, "logps/rejected": -2.316908836364746, "loss": 0.6613, "rewards/accuracies": 1.0, "rewards/chosen": 0.6619736552238464, "rewards/margins": 0.1316368579864502, "rewards/rejected": 0.5303367972373962, "step": 533 }, { "epoch": 0.97, "learning_rate": 7.928649302245123e-08, "logits/chosen": -2.2357232570648193, "logits/rejected": -2.204946994781494, "logps/chosen": -2.3372256755828857, "logps/rejected": 0.0, "loss": 0.5472, "rewards/accuracies": 1.0, "rewards/chosen": 0.7060582041740417, "rewards/margins": 0.7060582041740417, "rewards/rejected": 0.0, "step": 534 }, { "epoch": 0.97, "learning_rate": 7.920731105897168e-08, "logits/chosen": -2.3413281440734863, "logits/rejected": -2.4531941413879395, "logps/chosen": -2.697225332260132, "logps/rejected": -24.565868377685547, "loss": 0.5403, "rewards/accuracies": 1.0, "rewards/chosen": 0.5870819091796875, "rewards/margins": 0.5993854403495789, "rewards/rejected": -0.012303543277084827, "step": 535 }, { "epoch": 0.97, "learning_rate": 7.912801774834478e-08, "logits/chosen": -2.3406548500061035, "logits/rejected": -2.34259033203125, "logps/chosen": -1.8033491373062134, "logps/rejected": -2.418179750442505, "loss": 0.8418, "rewards/accuracies": 1.0, "rewards/chosen": 0.5637046098709106, "rewards/margins": 0.038602352142333984, "rewards/rejected": 0.5251022577285767, "step": 536 }, { "epoch": 0.97, "learning_rate": 7.904861339286073e-08, "logits/chosen": -2.3515431880950928, "logits/rejected": -2.3566160202026367, "logps/chosen": -1.669044017791748, "logps/rejected": -11.484231948852539, "loss": 0.4944, "rewards/accuracies": 0.0, "rewards/chosen": 0.6690577268600464, "rewards/margins": -0.16620784997940063, "rewards/rejected": 0.835265576839447, "step": 537 }, { "epoch": 0.97, "learning_rate": 7.896909829523306e-08, "logits/chosen": -2.3175578117370605, "logits/rejected": -2.399030923843384, "logps/chosen": -3.9919557571411133, "logps/rejected": -22.60215950012207, "loss": 0.5067, "rewards/accuracies": 1.0, "rewards/chosen": 0.7094721794128418, "rewards/margins": 0.9370366334915161, "rewards/rejected": -0.22756443917751312, "step": 538 }, { "epoch": 0.97, "learning_rate": 7.88894727585975e-08, "logits/chosen": -2.2991268634796143, "logits/rejected": -2.309354305267334, "logps/chosen": -2.393071174621582, "logps/rejected": -13.32136344909668, "loss": 0.5075, "rewards/accuracies": 1.0, "rewards/chosen": 0.9537087678909302, "rewards/margins": 0.7673822641372681, "rewards/rejected": 0.1863265037536621, "step": 539 }, { "epoch": 0.98, "learning_rate": 7.880973708651077e-08, "logits/chosen": -2.268889904022217, "logits/rejected": -2.2714273929595947, "logps/chosen": -1.9208980798721313, "logps/rejected": -2.4328110218048096, "loss": 0.6947, "rewards/accuracies": 0.0, "rewards/chosen": 0.5344640612602234, "rewards/margins": -0.1902359127998352, "rewards/rejected": 0.7246999740600586, "step": 540 }, { "epoch": 0.98, "learning_rate": 7.872989158294953e-08, "logits/chosen": -2.367717981338501, "logits/rejected": -2.412968158721924, "logps/chosen": -3.6910934448242188, "logps/rejected": -28.13687515258789, "loss": 0.5268, "rewards/accuracies": 1.0, "rewards/chosen": 0.6568046808242798, "rewards/margins": 1.1071703433990479, "rewards/rejected": -0.4503656327724457, "step": 541 }, { "epoch": 0.98, "learning_rate": 7.864993655230909e-08, "logits/chosen": -2.1785428524017334, "logits/rejected": -2.180279016494751, "logps/chosen": -1.896374225616455, "logps/rejected": -10.818632125854492, "loss": 0.9811, "rewards/accuracies": 0.0, "rewards/chosen": 0.5705410242080688, "rewards/margins": -0.30453604459762573, "rewards/rejected": 0.8750770688056946, "step": 542 }, { "epoch": 0.98, "learning_rate": 7.856987229940233e-08, "logits/chosen": -2.243178129196167, "logits/rejected": -2.243170976638794, "logps/chosen": -2.475327968597412, "logps/rejected": -3.0354132652282715, "loss": 0.6253, "rewards/accuracies": 1.0, "rewards/chosen": 0.6372696757316589, "rewards/margins": 0.10174256563186646, "rewards/rejected": 0.5355271100997925, "step": 543 }, { "epoch": 0.98, "learning_rate": 7.848969912945853e-08, "logits/chosen": -2.1727001667022705, "logits/rejected": -2.1748101711273193, "logps/chosen": -0.8205885291099548, "logps/rejected": -6.214556694030762, "loss": 0.7128, "rewards/accuracies": 0.0, "rewards/chosen": 0.49689197540283203, "rewards/margins": -0.20434105396270752, "rewards/rejected": 0.7012330293655396, "step": 544 }, { "epoch": 0.99, "learning_rate": 7.840941734812214e-08, "logits/chosen": -2.352637767791748, "logits/rejected": -2.5003604888916016, "logps/chosen": -10.702959060668945, "logps/rejected": -30.60585594177246, "loss": 0.634, "rewards/accuracies": 1.0, "rewards/chosen": 0.36141157150268555, "rewards/margins": 0.07741251587867737, "rewards/rejected": 0.2839990556240082, "step": 545 }, { "epoch": 0.99, "learning_rate": 7.832902726145176e-08, "logits/chosen": -2.309832811355591, "logits/rejected": -2.3013086318969727, "logps/chosen": -8.003857612609863, "logps/rejected": -2.9479193687438965, "loss": 0.6475, "rewards/accuracies": 1.0, "rewards/chosen": 0.6502047777175903, "rewards/margins": 0.08657336235046387, "rewards/rejected": 0.5636314153671265, "step": 546 }, { "epoch": 0.99, "learning_rate": 7.824852917591882e-08, "logits/chosen": -2.2902135848999023, "logits/rejected": -2.285554885864258, "logps/chosen": -1.8321255445480347, "logps/rejected": -2.1092803478240967, "loss": 0.5712, "rewards/accuracies": 0.0, "rewards/chosen": 0.5881416201591492, "rewards/margins": -0.1759352684020996, "rewards/rejected": 0.7640768885612488, "step": 547 }, { "epoch": 0.99, "learning_rate": 7.816792339840646e-08, "logits/chosen": -2.3374977111816406, "logits/rejected": -2.3391165733337402, "logps/chosen": -2.2589476108551025, "logps/rejected": -1.5182600021362305, "loss": 0.7421, "rewards/accuracies": 1.0, "rewards/chosen": 0.5011691451072693, "rewards/margins": 0.0007469058036804199, "rewards/rejected": 0.5004222393035889, "step": 548 }, { "epoch": 0.99, "learning_rate": 7.808721023620846e-08, "logits/chosen": -2.3510499000549316, "logits/rejected": -2.3485891819000244, "logps/chosen": -4.383586883544922, "logps/rejected": -2.605886697769165, "loss": 0.6482, "rewards/accuracies": 1.0, "rewards/chosen": 0.6018646359443665, "rewards/margins": 0.15009689331054688, "rewards/rejected": 0.4517677426338196, "step": 549 }, { "epoch": 0.99, "learning_rate": 7.800638999702788e-08, "logits/chosen": -2.382936954498291, "logits/rejected": -2.3838770389556885, "logps/chosen": -1.5971882343292236, "logps/rejected": -2.5395283699035645, "loss": 0.6378, "rewards/accuracies": 0.0, "rewards/chosen": 0.5625501871109009, "rewards/margins": -0.006048977375030518, "rewards/rejected": 0.5685991644859314, "step": 550 }, { "epoch": 1.0, "learning_rate": 7.792546298897607e-08, "logits/chosen": -2.2894985675811768, "logits/rejected": -2.2947652339935303, "logps/chosen": -5.404603004455566, "logps/rejected": -5.197310447692871, "loss": 0.6703, "rewards/accuracies": 1.0, "rewards/chosen": 0.5047724843025208, "rewards/margins": 0.09039995074272156, "rewards/rejected": 0.4143725335597992, "step": 551 }, { "epoch": 1.0, "learning_rate": 7.784442952057135e-08, "logits/chosen": -2.3339083194732666, "logits/rejected": -2.3928983211517334, "logps/chosen": -1.9463527202606201, "logps/rejected": -30.176387786865234, "loss": 0.5741, "rewards/accuracies": 1.0, "rewards/chosen": 0.7306254506111145, "rewards/margins": 0.5038386583328247, "rewards/rejected": 0.226786807179451, "step": 552 }, { "epoch": 1.0, "learning_rate": 7.776328990073796e-08, "logits/chosen": -2.2081665992736816, "logits/rejected": -2.227991819381714, "logps/chosen": -1.1245752573013306, "logps/rejected": -14.605779647827148, "loss": 0.7456, "rewards/accuracies": 0.0, "rewards/chosen": 0.37735500931739807, "rewards/margins": -0.4050535261631012, "rewards/rejected": 0.7824085354804993, "step": 553 }, { "epoch": 1.0, "learning_rate": 7.768204443880475e-08, "logits/chosen": -2.378422498703003, "logits/rejected": -2.343825340270996, "logps/chosen": -1.6461138725280762, "logps/rejected": 0.0, "loss": 0.5524, "rewards/accuracies": 1.0, "rewards/chosen": 0.7885798215866089, "rewards/margins": 0.7885798215866089, "rewards/rejected": 0.0, "step": 554 }, { "epoch": 1.0, "learning_rate": 7.760069344450418e-08, "logits/chosen": -2.3605847358703613, "logits/rejected": -2.384291410446167, "logps/chosen": -2.4122705459594727, "logps/rejected": -5.544581890106201, "loss": 0.713, "rewards/accuracies": 1.0, "rewards/chosen": 0.867859959602356, "rewards/margins": 0.05128622055053711, "rewards/rejected": 0.8165737390518188, "step": 555 }, { "epoch": 1.01, "learning_rate": 7.751923722797093e-08, "logits/chosen": -2.3519513607025146, "logits/rejected": -2.3564155101776123, "logps/chosen": -1.374355435371399, "logps/rejected": -2.5333330631256104, "loss": 0.6468, "rewards/accuracies": 1.0, "rewards/chosen": 0.6387953758239746, "rewards/margins": 0.05031687021255493, "rewards/rejected": 0.5884785056114197, "step": 556 }, { "epoch": 1.01, "learning_rate": 7.743767609974085e-08, "logits/chosen": -2.3158743381500244, "logits/rejected": -2.2485482692718506, "logps/chosen": -26.136459350585938, "logps/rejected": -6.828957557678223, "loss": 0.9078, "rewards/accuracies": 0.0, "rewards/chosen": -0.1395137757062912, "rewards/margins": -0.7488654255867004, "rewards/rejected": 0.609351634979248, "step": 557 }, { "epoch": 1.01, "learning_rate": 7.735601037074977e-08, "logits/chosen": -2.362790584564209, "logits/rejected": -2.370018720626831, "logps/chosen": -2.051486015319824, "logps/rejected": -2.6136960983276367, "loss": 0.6, "rewards/accuracies": 1.0, "rewards/chosen": 0.742527425289154, "rewards/margins": 0.1162557601928711, "rewards/rejected": 0.626271665096283, "step": 558 }, { "epoch": 1.01, "learning_rate": 7.727424035233228e-08, "logits/chosen": -2.3777546882629395, "logits/rejected": -2.370169162750244, "logps/chosen": -8.077042579650879, "logps/rejected": -2.3650386333465576, "loss": 0.4699, "rewards/accuracies": 1.0, "rewards/chosen": 0.7077970504760742, "rewards/margins": 0.12294149398803711, "rewards/rejected": 0.5848555564880371, "step": 559 }, { "epoch": 1.01, "learning_rate": 7.719236635622051e-08, "logits/chosen": -2.2630903720855713, "logits/rejected": -2.252797842025757, "logps/chosen": -1.9133180379867554, "logps/rejected": -4.545705318450928, "loss": 0.869, "rewards/accuracies": 0.0, "rewards/chosen": 0.6096507906913757, "rewards/margins": -0.2592318058013916, "rewards/rejected": 0.8688825964927673, "step": 560 }, { "epoch": 1.01, "learning_rate": 7.711038869454303e-08, "logits/chosen": -2.2490479946136475, "logits/rejected": -2.2479610443115234, "logps/chosen": -3.610133409500122, "logps/rejected": -2.8535008430480957, "loss": 0.66, "rewards/accuracies": 1.0, "rewards/chosen": 0.8165337443351746, "rewards/margins": 0.1760568618774414, "rewards/rejected": 0.6404768824577332, "step": 561 }, { "epoch": 1.02, "learning_rate": 7.702830767982362e-08, "logits/chosen": -2.261810302734375, "logits/rejected": -2.2583796977996826, "logps/chosen": -1.0143455266952515, "logps/rejected": -7.128414154052734, "loss": 0.643, "rewards/accuracies": 1.0, "rewards/chosen": 0.6423972249031067, "rewards/margins": 0.26078349351882935, "rewards/rejected": 0.38161373138427734, "step": 562 }, { "epoch": 1.02, "learning_rate": 7.694612362498005e-08, "logits/chosen": -2.177957534790039, "logits/rejected": -2.176330089569092, "logps/chosen": -5.733086109161377, "logps/rejected": -2.023385524749756, "loss": 0.5884, "rewards/accuracies": 1.0, "rewards/chosen": 0.7725511193275452, "rewards/margins": 0.040899574756622314, "rewards/rejected": 0.7316515445709229, "step": 563 }, { "epoch": 1.02, "learning_rate": 7.686383684332291e-08, "logits/chosen": -2.3142507076263428, "logits/rejected": -2.3893001079559326, "logps/chosen": -15.802885055541992, "logps/rejected": -40.66641616821289, "loss": 0.3744, "rewards/accuracies": 1.0, "rewards/chosen": 1.0398333072662354, "rewards/margins": 1.2252031564712524, "rewards/rejected": -0.18536987900733948, "step": 564 }, { "epoch": 1.02, "learning_rate": 7.678144764855442e-08, "logits/chosen": -2.412652015686035, "logits/rejected": -2.413640260696411, "logps/chosen": -3.446181535720825, "logps/rejected": -3.5052196979522705, "loss": 0.5613, "rewards/accuracies": 0.0, "rewards/chosen": 0.6041848063468933, "rewards/margins": -0.13059085607528687, "rewards/rejected": 0.7347756624221802, "step": 565 }, { "epoch": 1.02, "learning_rate": 7.669895635476725e-08, "logits/chosen": -2.327092170715332, "logits/rejected": -2.335275411605835, "logps/chosen": -5.151272296905518, "logps/rejected": -27.330780029296875, "loss": 0.5665, "rewards/accuracies": 1.0, "rewards/chosen": 0.6615878939628601, "rewards/margins": 0.2930900454521179, "rewards/rejected": 0.3684978485107422, "step": 566 }, { "epoch": 1.03, "learning_rate": 7.661636327644327e-08, "logits/chosen": -2.188722848892212, "logits/rejected": -2.18908953666687, "logps/chosen": -1.6822408437728882, "logps/rejected": -6.635098934173584, "loss": 0.6774, "rewards/accuracies": 1.0, "rewards/chosen": 0.5794954895973206, "rewards/margins": 0.2905358374118805, "rewards/rejected": 0.28895965218544006, "step": 567 }, { "epoch": 1.03, "learning_rate": 7.65336687284524e-08, "logits/chosen": -2.357489824295044, "logits/rejected": -2.332746744155884, "logps/chosen": -8.831694602966309, "logps/rejected": 0.0, "loss": 0.4617, "rewards/accuracies": 1.0, "rewards/chosen": 0.8880574107170105, "rewards/margins": 0.8880574107170105, "rewards/rejected": 0.0, "step": 568 }, { "epoch": 1.03, "learning_rate": 7.645087302605139e-08, "logits/chosen": -2.3320696353912354, "logits/rejected": -2.33603835105896, "logps/chosen": -2.061828136444092, "logps/rejected": -3.6108593940734863, "loss": 0.646, "rewards/accuracies": 1.0, "rewards/chosen": 0.6677238941192627, "rewards/margins": 0.083560049533844, "rewards/rejected": 0.5841638445854187, "step": 569 }, { "epoch": 1.03, "learning_rate": 7.636797648488265e-08, "logits/chosen": -2.397995710372925, "logits/rejected": -2.407367706298828, "logps/chosen": -0.885206937789917, "logps/rejected": -6.038269519805908, "loss": 0.6773, "rewards/accuracies": 1.0, "rewards/chosen": 0.7116902470588684, "rewards/margins": 0.18807578086853027, "rewards/rejected": 0.5236144661903381, "step": 570 }, { "epoch": 1.03, "learning_rate": 7.628497942097295e-08, "logits/chosen": -2.3132596015930176, "logits/rejected": -2.2825284004211426, "logps/chosen": -1.4022328853607178, "logps/rejected": 0.0, "loss": 0.3615, "rewards/accuracies": 1.0, "rewards/chosen": 0.7228358387947083, "rewards/margins": 0.7228358387947083, "rewards/rejected": 0.0, "step": 571 }, { "epoch": 1.03, "learning_rate": 7.620188215073235e-08, "logits/chosen": -2.4049012660980225, "logits/rejected": -2.406862258911133, "logps/chosen": -1.8300282955169678, "logps/rejected": -5.219421863555908, "loss": 0.6202, "rewards/accuracies": 1.0, "rewards/chosen": 0.8132455945014954, "rewards/margins": 0.31676602363586426, "rewards/rejected": 0.4964795708656311, "step": 572 }, { "epoch": 1.04, "learning_rate": 7.611868499095291e-08, "logits/chosen": -2.2209808826446533, "logits/rejected": -2.2392444610595703, "logps/chosen": -2.7661635875701904, "logps/rejected": -11.74026870727539, "loss": 0.7549, "rewards/accuracies": 0.0, "rewards/chosen": 0.6339907646179199, "rewards/margins": -0.154180645942688, "rewards/rejected": 0.7881714105606079, "step": 573 }, { "epoch": 1.04, "learning_rate": 7.603538825880746e-08, "logits/chosen": -2.358546257019043, "logits/rejected": -2.3570449352264404, "logps/chosen": -1.0850454568862915, "logps/rejected": -2.131683349609375, "loss": 0.766, "rewards/accuracies": 1.0, "rewards/chosen": 0.6278851628303528, "rewards/margins": 0.029965996742248535, "rewards/rejected": 0.5979191660881042, "step": 574 }, { "epoch": 1.04, "learning_rate": 7.59519922718485e-08, "logits/chosen": -2.3165252208709717, "logits/rejected": -2.321474075317383, "logps/chosen": -1.8513580560684204, "logps/rejected": -0.9528095722198486, "loss": 0.6059, "rewards/accuracies": 1.0, "rewards/chosen": 0.4769134521484375, "rewards/margins": 0.008643150329589844, "rewards/rejected": 0.46827030181884766, "step": 575 }, { "epoch": 1.04, "learning_rate": 7.586849734800683e-08, "logits/chosen": -2.4194626808166504, "logits/rejected": -2.4150445461273193, "logps/chosen": -1.5371904373168945, "logps/rejected": -20.730175018310547, "loss": 0.516, "rewards/accuracies": 1.0, "rewards/chosen": 0.655639111995697, "rewards/margins": 0.6297376751899719, "rewards/rejected": 0.025901412591338158, "step": 576 }, { "epoch": 1.04, "learning_rate": 7.578490380559053e-08, "logits/chosen": -2.34279465675354, "logits/rejected": -2.338749885559082, "logps/chosen": -3.6318233013153076, "logps/rejected": -4.308933734893799, "loss": 0.5344, "rewards/accuracies": 0.0, "rewards/chosen": 0.5753813982009888, "rewards/margins": -0.051234543323516846, "rewards/rejected": 0.6266159415245056, "step": 577 }, { "epoch": 1.05, "learning_rate": 7.570121196328356e-08, "logits/chosen": -2.4689667224884033, "logits/rejected": -2.4684879779815674, "logps/chosen": -0.5913172960281372, "logps/rejected": -1.4038172960281372, "loss": 0.6172, "rewards/accuracies": 1.0, "rewards/chosen": 0.24863019585609436, "rewards/margins": 0.33125001192092896, "rewards/rejected": -0.082619808614254, "step": 578 }, { "epoch": 1.05, "learning_rate": 7.561742214014469e-08, "logits/chosen": -2.213369131088257, "logits/rejected": -2.2199058532714844, "logps/chosen": -2.0993828773498535, "logps/rejected": -1.6397621631622314, "loss": 0.679, "rewards/accuracies": 1.0, "rewards/chosen": 0.8407022356987, "rewards/margins": 0.09307831525802612, "rewards/rejected": 0.7476239204406738, "step": 579 }, { "epoch": 1.05, "learning_rate": 7.553353465560615e-08, "logits/chosen": -2.356674909591675, "logits/rejected": -2.3317785263061523, "logps/chosen": -1.625584363937378, "logps/rejected": 0.0, "loss": 0.4801, "rewards/accuracies": 1.0, "rewards/chosen": 0.6643549799919128, "rewards/margins": 0.6643549799919128, "rewards/rejected": 0.0, "step": 580 }, { "epoch": 1.05, "learning_rate": 7.544954982947256e-08, "logits/chosen": -2.4059829711914062, "logits/rejected": -2.4108994007110596, "logps/chosen": -4.605207920074463, "logps/rejected": -1.7501298189163208, "loss": 0.7443, "rewards/accuracies": 0.0, "rewards/chosen": 0.5601035952568054, "rewards/margins": -0.1983155608177185, "rewards/rejected": 0.7584191560745239, "step": 581 }, { "epoch": 1.05, "learning_rate": 7.536546798191958e-08, "logits/chosen": -2.5848276615142822, "logits/rejected": -2.5479631423950195, "logps/chosen": -13.479477882385254, "logps/rejected": -2.6063427925109863, "loss": 0.7115, "rewards/accuracies": 0.0, "rewards/chosen": 0.6706847548484802, "rewards/margins": -0.04152560234069824, "rewards/rejected": 0.7122103571891785, "step": 582 }, { "epoch": 1.05, "learning_rate": 7.528128943349283e-08, "logits/chosen": -2.1886484622955322, "logits/rejected": -2.2695813179016113, "logps/chosen": -1.9194564819335938, "logps/rejected": -30.399398803710938, "loss": 0.8087, "rewards/accuracies": 1.0, "rewards/chosen": 0.5930012464523315, "rewards/margins": 0.07951360940933228, "rewards/rejected": 0.5134876370429993, "step": 583 }, { "epoch": 1.06, "learning_rate": 7.519701450510645e-08, "logits/chosen": -2.3216822147369385, "logits/rejected": -2.3208677768707275, "logps/chosen": -4.378451824188232, "logps/rejected": -8.545595169067383, "loss": 0.796, "rewards/accuracies": 0.0, "rewards/chosen": 0.7581337690353394, "rewards/margins": -0.37613534927368164, "rewards/rejected": 1.134269118309021, "step": 584 }, { "epoch": 1.06, "learning_rate": 7.511264351804212e-08, "logits/chosen": -2.328434705734253, "logits/rejected": -2.3374600410461426, "logps/chosen": -3.570511817932129, "logps/rejected": -2.0917625427246094, "loss": 0.6816, "rewards/accuracies": 1.0, "rewards/chosen": 0.9565038084983826, "rewards/margins": 0.04672396183013916, "rewards/rejected": 0.9097798466682434, "step": 585 }, { "epoch": 1.06, "learning_rate": 7.502817679394768e-08, "logits/chosen": -2.2505664825439453, "logits/rejected": -2.2595508098602295, "logps/chosen": -1.3851733207702637, "logps/rejected": -3.7095088958740234, "loss": 0.6635, "rewards/accuracies": 0.0, "rewards/chosen": 0.5405026078224182, "rewards/margins": -0.2134876847267151, "rewards/rejected": 0.7539902925491333, "step": 586 }, { "epoch": 1.06, "learning_rate": 7.494361465483595e-08, "logits/chosen": -2.366117477416992, "logits/rejected": -2.3559961318969727, "logps/chosen": -3.7867379188537598, "logps/rejected": -5.65886926651001, "loss": 0.8264, "rewards/accuracies": 0.0, "rewards/chosen": 0.7758306264877319, "rewards/margins": -0.18625396490097046, "rewards/rejected": 0.9620845913887024, "step": 587 }, { "epoch": 1.06, "learning_rate": 7.485895742308352e-08, "logits/chosen": -2.2768609523773193, "logits/rejected": -2.280789852142334, "logps/chosen": -0.9317535161972046, "logps/rejected": -2.707951784133911, "loss": 0.5713, "rewards/accuracies": 1.0, "rewards/chosen": 0.804338276386261, "rewards/margins": 0.2512868642807007, "rewards/rejected": 0.5530514121055603, "step": 588 }, { "epoch": 1.07, "learning_rate": 7.477420542142948e-08, "logits/chosen": -2.336700201034546, "logits/rejected": -2.339243173599243, "logps/chosen": -2.2229583263397217, "logps/rejected": -1.607183575630188, "loss": 0.5736, "rewards/accuracies": 1.0, "rewards/chosen": 0.5047680139541626, "rewards/margins": 0.013238102197647095, "rewards/rejected": 0.4915299117565155, "step": 589 }, { "epoch": 1.07, "learning_rate": 7.468935897297423e-08, "logits/chosen": -2.188953161239624, "logits/rejected": -2.186521530151367, "logps/chosen": -1.1837738752365112, "logps/rejected": -2.600780963897705, "loss": 0.6906, "rewards/accuracies": 0.0, "rewards/chosen": 0.445976585149765, "rewards/margins": -0.17103657126426697, "rewards/rejected": 0.617013156414032, "step": 590 }, { "epoch": 1.07, "learning_rate": 7.460441840117821e-08, "logits/chosen": -2.3559257984161377, "logits/rejected": -2.365100860595703, "logps/chosen": -3.1530346870422363, "logps/rejected": -3.01820707321167, "loss": 0.714, "rewards/accuracies": 1.0, "rewards/chosen": 0.7750311493873596, "rewards/margins": 0.2062433958053589, "rewards/rejected": 0.5687877535820007, "step": 591 }, { "epoch": 1.07, "learning_rate": 7.45193840298607e-08, "logits/chosen": -2.337984085083008, "logits/rejected": -2.342090368270874, "logps/chosen": -2.0592455863952637, "logps/rejected": -2.986750841140747, "loss": 0.4069, "rewards/accuracies": 1.0, "rewards/chosen": 0.930881142616272, "rewards/margins": 0.2866806387901306, "rewards/rejected": 0.6442005038261414, "step": 592 }, { "epoch": 1.07, "learning_rate": 7.443425618319857e-08, "logits/chosen": -2.350226879119873, "logits/rejected": -2.3212530612945557, "logps/chosen": -2.3822357654571533, "logps/rejected": 0.0, "loss": 0.5652, "rewards/accuracies": 1.0, "rewards/chosen": 0.7500521540641785, "rewards/margins": 0.7500521540641785, "rewards/rejected": 0.0, "step": 593 }, { "epoch": 1.07, "learning_rate": 7.434903518572504e-08, "logits/chosen": -2.254438638687134, "logits/rejected": -2.249553680419922, "logps/chosen": -5.267626762390137, "logps/rejected": -6.0810747146606445, "loss": 0.6215, "rewards/accuracies": 1.0, "rewards/chosen": 0.6767261624336243, "rewards/margins": 0.12676209211349487, "rewards/rejected": 0.5499640703201294, "step": 594 }, { "epoch": 1.08, "learning_rate": 7.426372136232847e-08, "logits/chosen": -2.350166082382202, "logits/rejected": -2.3500235080718994, "logps/chosen": -2.230346202850342, "logps/rejected": -11.859881401062012, "loss": 0.5089, "rewards/accuracies": 1.0, "rewards/chosen": 0.9824232459068298, "rewards/margins": 0.8460500836372375, "rewards/rejected": 0.1363731473684311, "step": 595 }, { "epoch": 1.08, "learning_rate": 7.417831503825108e-08, "logits/chosen": -2.3800082206726074, "logits/rejected": -2.3760986328125, "logps/chosen": -1.7533471584320068, "logps/rejected": -1.9133830070495605, "loss": 0.6197, "rewards/accuracies": 1.0, "rewards/chosen": 0.6480931639671326, "rewards/margins": 0.0796440839767456, "rewards/rejected": 0.568449079990387, "step": 596 }, { "epoch": 1.08, "learning_rate": 7.409281653908772e-08, "logits/chosen": -2.3681511878967285, "logits/rejected": -2.3554303646087646, "logps/chosen": -6.692915439605713, "logps/rejected": -3.4716832637786865, "loss": 0.6968, "rewards/accuracies": 0.0, "rewards/chosen": 0.6028881669044495, "rewards/margins": -0.2928928732872009, "rewards/rejected": 0.8957810401916504, "step": 597 }, { "epoch": 1.08, "learning_rate": 7.40072261907847e-08, "logits/chosen": -2.2755093574523926, "logits/rejected": -2.3034980297088623, "logps/chosen": 0.0, "logps/rejected": -6.21716833114624, "loss": 0.7459, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": -0.37253719568252563, "rewards/rejected": 0.37253719568252563, "step": 598 }, { "epoch": 1.08, "learning_rate": 7.39215443196384e-08, "logits/chosen": -2.34328293800354, "logits/rejected": -2.3166656494140625, "logps/chosen": -1.5900136232376099, "logps/rejected": 0.0, "loss": 0.5244, "rewards/accuracies": 1.0, "rewards/chosen": 0.8672232627868652, "rewards/margins": 0.8672232627868652, "rewards/rejected": 0.0, "step": 599 }, { "epoch": 1.08, "learning_rate": 7.383577125229417e-08, "logits/chosen": -2.376260757446289, "logits/rejected": -2.46966290473938, "logps/chosen": -2.1347081661224365, "logps/rejected": -29.821252822875977, "loss": 0.537, "rewards/accuracies": 1.0, "rewards/chosen": 0.4957168698310852, "rewards/margins": 0.5167946219444275, "rewards/rejected": -0.021077727898955345, "step": 600 }, { "epoch": 1.09, "learning_rate": 7.374990731574502e-08, "logits/chosen": -2.30863618850708, "logits/rejected": -2.3059935569763184, "logps/chosen": -2.1571083068847656, "logps/rejected": -6.2573370933532715, "loss": 0.5826, "rewards/accuracies": 1.0, "rewards/chosen": 0.6326784491539001, "rewards/margins": 0.2405601441860199, "rewards/rejected": 0.39211830496788025, "step": 601 }, { "epoch": 1.09, "learning_rate": 7.366395283733036e-08, "logits/chosen": -2.3835480213165283, "logits/rejected": -2.379000186920166, "logps/chosen": -1.8062214851379395, "logps/rejected": -3.0773613452911377, "loss": 0.6475, "rewards/accuracies": 1.0, "rewards/chosen": 0.6196149587631226, "rewards/margins": 0.18143394589424133, "rewards/rejected": 0.4381810128688812, "step": 602 }, { "epoch": 1.09, "learning_rate": 7.357790814473479e-08, "logits/chosen": -2.254878282546997, "logits/rejected": -2.2371773719787598, "logps/chosen": -16.891742706298828, "logps/rejected": -4.906455993652344, "loss": 0.8264, "rewards/accuracies": 0.0, "rewards/chosen": 0.2247287780046463, "rewards/margins": -0.5146579146385193, "rewards/rejected": 0.7393866777420044, "step": 603 }, { "epoch": 1.09, "learning_rate": 7.349177356598685e-08, "logits/chosen": -2.2014222145080566, "logits/rejected": -2.207923650741577, "logps/chosen": -2.667813301086426, "logps/rejected": -2.3240225315093994, "loss": 0.5796, "rewards/accuracies": 0.0, "rewards/chosen": 0.45760297775268555, "rewards/margins": -0.05223172903060913, "rewards/rejected": 0.5098347067832947, "step": 604 }, { "epoch": 1.09, "learning_rate": 7.340554942945773e-08, "logits/chosen": -2.18009614944458, "logits/rejected": -2.1812961101531982, "logps/chosen": -2.0152459144592285, "logps/rejected": -5.317528247833252, "loss": 0.8892, "rewards/accuracies": 0.0, "rewards/chosen": 0.4007888436317444, "rewards/margins": -0.45210176706314087, "rewards/rejected": 0.8528906106948853, "step": 605 }, { "epoch": 1.1, "learning_rate": 7.331923606386002e-08, "logits/chosen": -2.339047431945801, "logits/rejected": -2.3400654792785645, "logps/chosen": -2.8438315391540527, "logps/rejected": -2.2258856296539307, "loss": 0.7666, "rewards/accuracies": 0.0, "rewards/chosen": 0.7086806893348694, "rewards/margins": -0.10122829675674438, "rewards/rejected": 0.8099089860916138, "step": 606 }, { "epoch": 1.1, "learning_rate": 7.323283379824653e-08, "logits/chosen": -2.236382007598877, "logits/rejected": -2.256096601486206, "logps/chosen": -9.904858589172363, "logps/rejected": -8.46932315826416, "loss": 0.7597, "rewards/accuracies": 1.0, "rewards/chosen": 0.6696200370788574, "rewards/margins": 0.10015922784805298, "rewards/rejected": 0.5694608092308044, "step": 607 }, { "epoch": 1.1, "learning_rate": 7.314634296200896e-08, "logits/chosen": -2.2715325355529785, "logits/rejected": -2.259096622467041, "logps/chosen": -5.773568630218506, "logps/rejected": -7.167591571807861, "loss": 0.6608, "rewards/accuracies": 0.0, "rewards/chosen": 0.43608832359313965, "rewards/margins": -0.012099087238311768, "rewards/rejected": 0.4481874108314514, "step": 608 }, { "epoch": 1.1, "learning_rate": 7.305976388487664e-08, "logits/chosen": -2.274446487426758, "logits/rejected": -2.2703187465667725, "logps/chosen": -8.817968368530273, "logps/rejected": -1.6538194417953491, "loss": 0.6731, "rewards/accuracies": 1.0, "rewards/chosen": 0.8884220123291016, "rewards/margins": 0.0818490982055664, "rewards/rejected": 0.8065729141235352, "step": 609 }, { "epoch": 1.1, "learning_rate": 7.297309689691537e-08, "logits/chosen": -2.377408266067505, "logits/rejected": -2.397578001022339, "logps/chosen": 0.0, "logps/rejected": -1.7546969652175903, "loss": 0.768, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": -0.5793511271476746, "rewards/rejected": 0.5793511271476746, "step": 610 }, { "epoch": 1.1, "learning_rate": 7.288634232852602e-08, "logits/chosen": -2.304931879043579, "logits/rejected": -2.307368040084839, "logps/chosen": -1.7603105306625366, "logps/rejected": -2.5862553119659424, "loss": 0.478, "rewards/accuracies": 1.0, "rewards/chosen": 0.6463903188705444, "rewards/margins": 0.09497135877609253, "rewards/rejected": 0.5514189600944519, "step": 611 }, { "epoch": 1.11, "learning_rate": 7.279950051044336e-08, "logits/chosen": -2.487579107284546, "logits/rejected": -2.466923952102661, "logps/chosen": -24.684978485107422, "logps/rejected": -3.7915854454040527, "loss": 0.8027, "rewards/accuracies": 0.0, "rewards/chosen": 0.15960541367530823, "rewards/margins": -0.4215008318424225, "rewards/rejected": 0.5811062455177307, "step": 612 }, { "epoch": 1.11, "learning_rate": 7.271257177373485e-08, "logits/chosen": -2.389437198638916, "logits/rejected": -2.3875532150268555, "logps/chosen": -1.3277254104614258, "logps/rejected": -1.6764798164367676, "loss": 0.6716, "rewards/accuracies": 1.0, "rewards/chosen": 0.7784105539321899, "rewards/margins": 0.17332196235656738, "rewards/rejected": 0.6050885915756226, "step": 613 }, { "epoch": 1.11, "learning_rate": 7.262555644979921e-08, "logits/chosen": -2.328458309173584, "logits/rejected": -2.413329839706421, "logps/chosen": -1.495961308479309, "logps/rejected": -38.68779373168945, "loss": 0.5769, "rewards/accuracies": 1.0, "rewards/chosen": 0.6130082011222839, "rewards/margins": 1.153807520866394, "rewards/rejected": -0.5407993197441101, "step": 614 }, { "epoch": 1.11, "learning_rate": 7.253845487036532e-08, "logits/chosen": -2.2371809482574463, "logits/rejected": -2.234076738357544, "logps/chosen": -2.103323459625244, "logps/rejected": -1.7859926223754883, "loss": 0.5095, "rewards/accuracies": 1.0, "rewards/chosen": 0.7075986266136169, "rewards/margins": 0.26032862067222595, "rewards/rejected": 0.447270005941391, "step": 615 }, { "epoch": 1.11, "learning_rate": 7.245126736749088e-08, "logits/chosen": -2.4494071006774902, "logits/rejected": -2.449106216430664, "logps/chosen": -2.5392391681671143, "logps/rejected": -2.6884970664978027, "loss": 0.7511, "rewards/accuracies": 0.0, "rewards/chosen": 0.7316649556159973, "rewards/margins": -0.04904675483703613, "rewards/rejected": 0.7807117104530334, "step": 616 }, { "epoch": 1.12, "learning_rate": 7.236399427356114e-08, "logits/chosen": -2.4922118186950684, "logits/rejected": -2.4913463592529297, "logps/chosen": -9.058225631713867, "logps/rejected": -3.8116567134857178, "loss": 0.8536, "rewards/accuracies": 0.0, "rewards/chosen": 0.5454155206680298, "rewards/margins": -0.24535226821899414, "rewards/rejected": 0.7907677888870239, "step": 617 }, { "epoch": 1.12, "learning_rate": 7.227663592128766e-08, "logits/chosen": -2.3308703899383545, "logits/rejected": -2.3785462379455566, "logps/chosen": -2.4696297645568848, "logps/rejected": -20.99260139465332, "loss": 0.5351, "rewards/accuracies": 1.0, "rewards/chosen": 0.6273999810218811, "rewards/margins": 0.42650943994522095, "rewards/rejected": 0.20089054107666016, "step": 618 }, { "epoch": 1.12, "learning_rate": 7.218919264370704e-08, "logits/chosen": -2.3093461990356445, "logits/rejected": -2.3309805393218994, "logps/chosen": -8.194255828857422, "logps/rejected": -12.528703689575195, "loss": 0.6067, "rewards/accuracies": 0.0, "rewards/chosen": 0.5010181665420532, "rewards/margins": -0.042661845684051514, "rewards/rejected": 0.5436800122261047, "step": 619 }, { "epoch": 1.12, "learning_rate": 7.210166477417962e-08, "logits/chosen": -2.273120403289795, "logits/rejected": -2.339195966720581, "logps/chosen": -2.320453643798828, "logps/rejected": -36.25324249267578, "loss": 0.5825, "rewards/accuracies": 1.0, "rewards/chosen": 0.6522611975669861, "rewards/margins": 0.29049476981163025, "rewards/rejected": 0.36176642775535583, "step": 620 }, { "epoch": 1.12, "learning_rate": 7.201405264638827e-08, "logits/chosen": -2.2436611652374268, "logits/rejected": -2.2483816146850586, "logps/chosen": -2.708019733428955, "logps/rejected": -4.395004749298096, "loss": 0.7285, "rewards/accuracies": 0.0, "rewards/chosen": 0.8081507086753845, "rewards/margins": -0.06544226408004761, "rewards/rejected": 0.8735929727554321, "step": 621 }, { "epoch": 1.12, "learning_rate": 7.192635659433701e-08, "logits/chosen": -2.34080171585083, "logits/rejected": -2.3483500480651855, "logps/chosen": -3.061718463897705, "logps/rejected": -1.2384772300720215, "loss": 0.6287, "rewards/accuracies": 0.0, "rewards/chosen": 0.5370614528656006, "rewards/margins": -0.0018021464347839355, "rewards/rejected": 0.5388635993003845, "step": 622 }, { "epoch": 1.13, "learning_rate": 7.18385769523499e-08, "logits/chosen": -2.3234734535217285, "logits/rejected": -2.324209451675415, "logps/chosen": -1.9070312976837158, "logps/rejected": -1.5131645202636719, "loss": 0.7245, "rewards/accuracies": 1.0, "rewards/chosen": 0.5728595852851868, "rewards/margins": 0.08338823914527893, "rewards/rejected": 0.48947134613990784, "step": 623 }, { "epoch": 1.13, "learning_rate": 7.175071405506956e-08, "logits/chosen": -2.279108762741089, "logits/rejected": -2.276454210281372, "logps/chosen": -7.245517730712891, "logps/rejected": -4.707742691040039, "loss": 0.5064, "rewards/accuracies": 1.0, "rewards/chosen": 0.9043874740600586, "rewards/margins": 0.4780227541923523, "rewards/rejected": 0.4263647198677063, "step": 624 }, { "epoch": 1.13, "learning_rate": 7.166276823745608e-08, "logits/chosen": -2.475250720977783, "logits/rejected": -2.48332142829895, "logps/chosen": -1.6928452253341675, "logps/rejected": -8.81273365020752, "loss": 0.7724, "rewards/accuracies": 1.0, "rewards/chosen": 0.5764254927635193, "rewards/margins": 0.01790851354598999, "rewards/rejected": 0.5585169792175293, "step": 625 }, { "epoch": 1.13, "learning_rate": 7.157473983478568e-08, "logits/chosen": -2.3598101139068604, "logits/rejected": -2.3678009510040283, "logps/chosen": -1.4338830709457397, "logps/rejected": -3.874056339263916, "loss": 0.6549, "rewards/accuracies": 1.0, "rewards/chosen": 0.5943203568458557, "rewards/margins": 0.1593766212463379, "rewards/rejected": 0.4349437355995178, "step": 626 }, { "epoch": 1.13, "learning_rate": 7.148662918264936e-08, "logits/chosen": -2.350565195083618, "logits/rejected": -2.3471059799194336, "logps/chosen": -6.949605464935303, "logps/rejected": -8.632292747497559, "loss": 0.831, "rewards/accuracies": 1.0, "rewards/chosen": 0.9841882586479187, "rewards/margins": 0.5681661367416382, "rewards/rejected": 0.4160221219062805, "step": 627 }, { "epoch": 1.14, "learning_rate": 7.139843661695169e-08, "logits/chosen": -2.3261098861694336, "logits/rejected": -2.306702136993408, "logps/chosen": -10.191503524780273, "logps/rejected": -2.2344789505004883, "loss": 0.7775, "rewards/accuracies": 1.0, "rewards/chosen": 0.9089617133140564, "rewards/margins": 0.36356472969055176, "rewards/rejected": 0.5453969836235046, "step": 628 }, { "epoch": 1.14, "learning_rate": 7.131016247390956e-08, "logits/chosen": -2.29776930809021, "logits/rejected": -2.3184263706207275, "logps/chosen": -8.11691665649414, "logps/rejected": -23.803499221801758, "loss": 0.5639, "rewards/accuracies": 1.0, "rewards/chosen": 0.8079061508178711, "rewards/margins": 0.48288649320602417, "rewards/rejected": 0.3250196576118469, "step": 629 }, { "epoch": 1.14, "learning_rate": 7.122180709005083e-08, "logits/chosen": -2.200848340988159, "logits/rejected": -2.19543194770813, "logps/chosen": -1.786170244216919, "logps/rejected": -3.8833532333374023, "loss": 0.7109, "rewards/accuracies": 1.0, "rewards/chosen": 0.4891989827156067, "rewards/margins": 0.06967392563819885, "rewards/rejected": 0.41952505707740784, "step": 630 }, { "epoch": 1.14, "learning_rate": 7.113337080221308e-08, "logits/chosen": -2.332571506500244, "logits/rejected": -2.340301036834717, "logps/chosen": -2.419447898864746, "logps/rejected": -2.1717946529388428, "loss": 0.6317, "rewards/accuracies": 1.0, "rewards/chosen": 0.7434704899787903, "rewards/margins": 0.262928307056427, "rewards/rejected": 0.4805421829223633, "step": 631 }, { "epoch": 1.14, "learning_rate": 7.104485394754231e-08, "logits/chosen": -2.4065423011779785, "logits/rejected": -2.528017997741699, "logps/chosen": -2.2589147090911865, "logps/rejected": -26.89713478088379, "loss": 0.4384, "rewards/accuracies": 1.0, "rewards/chosen": 0.6471466422080994, "rewards/margins": 0.8827612400054932, "rewards/rejected": -0.2356145828962326, "step": 632 }, { "epoch": 1.14, "learning_rate": 7.095625686349169e-08, "logits/chosen": -2.4129931926727295, "logits/rejected": -2.3861427307128906, "logps/chosen": -1.7175637483596802, "logps/rejected": 0.0, "loss": 0.6193, "rewards/accuracies": 1.0, "rewards/chosen": 0.5696407556533813, "rewards/margins": 0.5696407556533813, "rewards/rejected": 0.0, "step": 633 }, { "epoch": 1.15, "learning_rate": 7.086757988782022e-08, "logits/chosen": -2.28462815284729, "logits/rejected": -2.38063645362854, "logps/chosen": -2.534888982772827, "logps/rejected": -28.210359573364258, "loss": 0.763, "rewards/accuracies": 0.0, "rewards/chosen": 0.5285000205039978, "rewards/margins": -0.20916688442230225, "rewards/rejected": 0.7376669049263, "step": 634 }, { "epoch": 1.15, "learning_rate": 7.07788233585915e-08, "logits/chosen": -2.315833568572998, "logits/rejected": -2.32421612739563, "logps/chosen": -4.7484941482543945, "logps/rejected": -2.417600631713867, "loss": 0.7047, "rewards/accuracies": 1.0, "rewards/chosen": 0.6467009782791138, "rewards/margins": 0.18352442979812622, "rewards/rejected": 0.46317654848098755, "step": 635 }, { "epoch": 1.15, "learning_rate": 7.068998761417238e-08, "logits/chosen": -2.166008472442627, "logits/rejected": -2.165442705154419, "logps/chosen": -1.8656909465789795, "logps/rejected": -2.377018690109253, "loss": 0.5334, "rewards/accuracies": 1.0, "rewards/chosen": 0.7472133040428162, "rewards/margins": 0.2851574122905731, "rewards/rejected": 0.46205589175224304, "step": 636 }, { "epoch": 1.15, "learning_rate": 7.060107299323173e-08, "logits/chosen": -2.3004419803619385, "logits/rejected": -2.304725170135498, "logps/chosen": -2.956712245941162, "logps/rejected": -8.917631149291992, "loss": 0.7091, "rewards/accuracies": 0.0, "rewards/chosen": 0.7305750250816345, "rewards/margins": -0.10993880033493042, "rewards/rejected": 0.8405138254165649, "step": 637 }, { "epoch": 1.15, "learning_rate": 7.05120798347391e-08, "logits/chosen": -2.293869733810425, "logits/rejected": -2.3602733612060547, "logps/chosen": -2.083620548248291, "logps/rejected": -21.407691955566406, "loss": 0.4387, "rewards/accuracies": 1.0, "rewards/chosen": 0.5249027013778687, "rewards/margins": 0.18266215920448303, "rewards/rejected": 0.3422405421733856, "step": 638 }, { "epoch": 1.16, "learning_rate": 7.042300847796348e-08, "logits/chosen": -2.3705527782440186, "logits/rejected": -2.368939161300659, "logps/chosen": -2.65944766998291, "logps/rejected": -2.1933093070983887, "loss": 0.5937, "rewards/accuracies": 1.0, "rewards/chosen": 0.7457301020622253, "rewards/margins": 0.08574002981185913, "rewards/rejected": 0.6599900722503662, "step": 639 }, { "epoch": 1.16, "learning_rate": 7.033385926247194e-08, "logits/chosen": -2.3800933361053467, "logits/rejected": -2.375591516494751, "logps/chosen": -3.349907159805298, "logps/rejected": -12.540343284606934, "loss": 0.4977, "rewards/accuracies": 1.0, "rewards/chosen": 0.674100399017334, "rewards/margins": 0.23485201597213745, "rewards/rejected": 0.43924838304519653, "step": 640 }, { "epoch": 1.16, "learning_rate": 7.02446325281284e-08, "logits/chosen": -2.415139675140381, "logits/rejected": -2.3307199478149414, "logps/chosen": -14.27568244934082, "logps/rejected": -8.504617691040039, "loss": 0.6927, "rewards/accuracies": 1.0, "rewards/chosen": 0.7126335501670837, "rewards/margins": 0.1711266040802002, "rewards/rejected": 0.5415069460868835, "step": 641 }, { "epoch": 1.16, "learning_rate": 7.015532861509227e-08, "logits/chosen": -2.3201863765716553, "logits/rejected": -2.3213932514190674, "logps/chosen": -2.2705180644989014, "logps/rejected": -3.2238526344299316, "loss": 0.6036, "rewards/accuracies": 0.0, "rewards/chosen": 0.5605312585830688, "rewards/margins": -0.05502486228942871, "rewards/rejected": 0.6155561208724976, "step": 642 }, { "epoch": 1.16, "learning_rate": 7.006594786381722e-08, "logits/chosen": -2.264336109161377, "logits/rejected": -2.265266180038452, "logps/chosen": -2.205169916152954, "logps/rejected": -1.461569905281067, "loss": 0.6791, "rewards/accuracies": 0.0, "rewards/chosen": 0.5240713357925415, "rewards/margins": -0.00083160400390625, "rewards/rejected": 0.5249029397964478, "step": 643 }, { "epoch": 1.16, "learning_rate": 6.997649061504985e-08, "logits/chosen": -2.3418290615081787, "logits/rejected": -2.3386707305908203, "logps/chosen": -4.214268207550049, "logps/rejected": -2.7149839401245117, "loss": 0.475, "rewards/accuracies": 1.0, "rewards/chosen": 0.7007550597190857, "rewards/margins": 0.317006915807724, "rewards/rejected": 0.3837481439113617, "step": 644 }, { "epoch": 1.17, "learning_rate": 6.988695720982836e-08, "logits/chosen": -2.3117570877075195, "logits/rejected": -2.310750722885132, "logps/chosen": -13.711877822875977, "logps/rejected": -3.1004209518432617, "loss": 0.6021, "rewards/accuracies": 1.0, "rewards/chosen": 0.9129797220230103, "rewards/margins": 0.25103044509887695, "rewards/rejected": 0.6619492769241333, "step": 645 }, { "epoch": 1.17, "learning_rate": 6.979734798948134e-08, "logits/chosen": -2.362985849380493, "logits/rejected": -2.3626270294189453, "logps/chosen": -3.231691360473633, "logps/rejected": -1.846085786819458, "loss": 0.6652, "rewards/accuracies": 1.0, "rewards/chosen": 0.5561057925224304, "rewards/margins": 0.03300142288208008, "rewards/rejected": 0.5231043696403503, "step": 646 }, { "epoch": 1.17, "learning_rate": 6.970766329562634e-08, "logits/chosen": -2.3419132232666016, "logits/rejected": -2.363030433654785, "logps/chosen": -2.488046407699585, "logps/rejected": -23.797555923461914, "loss": 0.5825, "rewards/accuracies": 1.0, "rewards/chosen": 0.5775402188301086, "rewards/margins": 0.2005615234375, "rewards/rejected": 0.37697869539260864, "step": 647 }, { "epoch": 1.17, "learning_rate": 6.961790347016872e-08, "logits/chosen": -2.2738242149353027, "logits/rejected": -2.2740962505340576, "logps/chosen": -1.0029444694519043, "logps/rejected": -8.392398834228516, "loss": 0.7633, "rewards/accuracies": 0.0, "rewards/chosen": 0.5214161276817322, "rewards/margins": -0.4248233437538147, "rewards/rejected": 0.9462394714355469, "step": 648 }, { "epoch": 1.17, "learning_rate": 6.952806885530014e-08, "logits/chosen": -2.2610108852386475, "logits/rejected": -2.260138511657715, "logps/chosen": -0.8814277052879333, "logps/rejected": -1.847159743309021, "loss": 0.6946, "rewards/accuracies": 1.0, "rewards/chosen": 0.6473578214645386, "rewards/margins": 0.16175585985183716, "rewards/rejected": 0.4856019616127014, "step": 649 }, { "epoch": 1.18, "learning_rate": 6.943815979349754e-08, "logits/chosen": -2.2883737087249756, "logits/rejected": -2.284390449523926, "logps/chosen": -3.301347255706787, "logps/rejected": -3.1889781951904297, "loss": 0.6046, "rewards/accuracies": 1.0, "rewards/chosen": 0.6806620955467224, "rewards/margins": 0.05046391487121582, "rewards/rejected": 0.6301981806755066, "step": 650 }, { "epoch": 1.18, "learning_rate": 6.934817662752152e-08, "logits/chosen": -2.46585750579834, "logits/rejected": -2.4026296138763428, "logps/chosen": -20.10894012451172, "logps/rejected": -3.073922872543335, "loss": 0.8531, "rewards/accuracies": 0.0, "rewards/chosen": -0.024915123358368874, "rewards/margins": -0.6324926018714905, "rewards/rejected": 0.6075775027275085, "step": 651 }, { "epoch": 1.18, "learning_rate": 6.925811970041533e-08, "logits/chosen": -2.225874900817871, "logits/rejected": -2.2302844524383545, "logps/chosen": -5.240023612976074, "logps/rejected": -1.8409558534622192, "loss": 0.5751, "rewards/accuracies": 1.0, "rewards/chosen": 0.6512095332145691, "rewards/margins": 0.10959166288375854, "rewards/rejected": 0.5416178703308105, "step": 652 }, { "epoch": 1.18, "learning_rate": 6.916798935550329e-08, "logits/chosen": -2.33703875541687, "logits/rejected": -2.312645673751831, "logps/chosen": -7.390085220336914, "logps/rejected": 0.0, "loss": 0.5718, "rewards/accuracies": 1.0, "rewards/chosen": 1.0629485845565796, "rewards/margins": 1.0629485845565796, "rewards/rejected": 0.0, "step": 653 }, { "epoch": 1.18, "learning_rate": 6.907778593638971e-08, "logits/chosen": -2.2463691234588623, "logits/rejected": -2.2948386669158936, "logps/chosen": -1.074804425239563, "logps/rejected": -37.58661651611328, "loss": 0.693, "rewards/accuracies": 1.0, "rewards/chosen": 0.5005990266799927, "rewards/margins": 0.000537574291229248, "rewards/rejected": 0.5000614523887634, "step": 654 }, { "epoch": 1.18, "learning_rate": 6.898750978695741e-08, "logits/chosen": -2.38311767578125, "logits/rejected": -2.388274669647217, "logps/chosen": -2.241037368774414, "logps/rejected": -1.6941479444503784, "loss": 0.5905, "rewards/accuracies": 1.0, "rewards/chosen": 0.6170100569725037, "rewards/margins": 0.17809602618217468, "rewards/rejected": 0.438914030790329, "step": 655 }, { "epoch": 1.19, "learning_rate": 6.889716125136652e-08, "logits/chosen": -2.224196672439575, "logits/rejected": -2.2432587146759033, "logps/chosen": -2.096280813217163, "logps/rejected": -27.272701263427734, "loss": 0.5342, "rewards/accuracies": 1.0, "rewards/chosen": 0.5236601829528809, "rewards/margins": 0.2253323495388031, "rewards/rejected": 0.29832783341407776, "step": 656 }, { "epoch": 1.19, "learning_rate": 6.880674067405315e-08, "logits/chosen": -2.351638078689575, "logits/rejected": -2.4190540313720703, "logps/chosen": -4.785324573516846, "logps/rejected": -25.20694923400879, "loss": 0.6943, "rewards/accuracies": 0.0, "rewards/chosen": 0.811156690120697, "rewards/margins": -0.08798098564147949, "rewards/rejected": 0.8991376757621765, "step": 657 }, { "epoch": 1.19, "learning_rate": 6.871624839972797e-08, "logits/chosen": -2.289445161819458, "logits/rejected": -2.3131821155548096, "logps/chosen": 0.0, "logps/rejected": -2.9209015369415283, "loss": 1.0233, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": -0.9854508638381958, "rewards/rejected": 0.9854508638381958, "step": 658 }, { "epoch": 1.19, "learning_rate": 6.862568477337507e-08, "logits/chosen": -2.284611701965332, "logits/rejected": -2.2863852977752686, "logps/chosen": -5.274142742156982, "logps/rejected": -3.5753231048583984, "loss": 0.6875, "rewards/accuracies": 1.0, "rewards/chosen": 0.7342369556427002, "rewards/margins": 0.20009416341781616, "rewards/rejected": 0.534142792224884, "step": 659 }, { "epoch": 1.19, "learning_rate": 6.853505014025052e-08, "logits/chosen": -2.2569010257720947, "logits/rejected": -2.3588204383850098, "logps/chosen": -3.400778293609619, "logps/rejected": -31.679882049560547, "loss": 0.5559, "rewards/accuracies": 1.0, "rewards/chosen": 0.5542659163475037, "rewards/margins": 0.5939046144485474, "rewards/rejected": -0.0396387092769146, "step": 660 }, { "epoch": 1.2, "learning_rate": 6.844434484588108e-08, "logits/chosen": -2.2360870838165283, "logits/rejected": -2.220365047454834, "logps/chosen": -6.557814598083496, "logps/rejected": 0.0, "loss": 0.5248, "rewards/accuracies": 1.0, "rewards/chosen": 0.9011710286140442, "rewards/margins": 0.9011710286140442, "rewards/rejected": 0.0, "step": 661 }, { "epoch": 1.2, "learning_rate": 6.83535692360629e-08, "logits/chosen": -2.35782790184021, "logits/rejected": -2.3592026233673096, "logps/chosen": -2.4794795513153076, "logps/rejected": -4.502718448638916, "loss": 0.5632, "rewards/accuracies": 1.0, "rewards/chosen": 0.8354043960571289, "rewards/margins": 0.06459164619445801, "rewards/rejected": 0.7708127498626709, "step": 662 }, { "epoch": 1.2, "learning_rate": 6.826272365686022e-08, "logits/chosen": -2.2524046897888184, "logits/rejected": -2.25959849357605, "logps/chosen": -4.7982869148254395, "logps/rejected": -2.680307149887085, "loss": 0.66, "rewards/accuracies": 1.0, "rewards/chosen": 0.6763784289360046, "rewards/margins": 0.1343727707862854, "rewards/rejected": 0.5420056581497192, "step": 663 }, { "epoch": 1.2, "learning_rate": 6.817180845460397e-08, "logits/chosen": -2.286156177520752, "logits/rejected": -2.2866692543029785, "logps/chosen": -3.2542426586151123, "logps/rejected": -3.3978190422058105, "loss": 0.5294, "rewards/accuracies": 1.0, "rewards/chosen": 0.9088993072509766, "rewards/margins": 0.2962053418159485, "rewards/rejected": 0.6126939654350281, "step": 664 }, { "epoch": 1.2, "learning_rate": 6.808082397589054e-08, "logits/chosen": -2.2575764656066895, "logits/rejected": -2.2506754398345947, "logps/chosen": -6.21886682510376, "logps/rejected": -2.8562300205230713, "loss": 0.5086, "rewards/accuracies": 1.0, "rewards/chosen": 0.704189658164978, "rewards/margins": 0.15494507551193237, "rewards/rejected": 0.5492445826530457, "step": 665 }, { "epoch": 1.2, "learning_rate": 6.798977056758043e-08, "logits/chosen": -2.2436838150024414, "logits/rejected": -2.245473861694336, "logps/chosen": -6.685429573059082, "logps/rejected": -5.365519046783447, "loss": 0.5906, "rewards/accuracies": 0.0, "rewards/chosen": 0.4790847897529602, "rewards/margins": -0.030777037143707275, "rewards/rejected": 0.5098618268966675, "step": 666 }, { "epoch": 1.21, "learning_rate": 6.789864857679693e-08, "logits/chosen": -2.27077317237854, "logits/rejected": -2.2851064205169678, "logps/chosen": -1.7751680612564087, "logps/rejected": -8.704720497131348, "loss": 0.5377, "rewards/accuracies": 1.0, "rewards/chosen": 0.7678596377372742, "rewards/margins": 0.18903732299804688, "rewards/rejected": 0.5788223147392273, "step": 667 }, { "epoch": 1.21, "learning_rate": 6.780745835092469e-08, "logits/chosen": -2.3585968017578125, "logits/rejected": -2.3652522563934326, "logps/chosen": -3.142436981201172, "logps/rejected": -3.3903024196624756, "loss": 0.6281, "rewards/accuracies": 1.0, "rewards/chosen": 0.5701245665550232, "rewards/margins": 0.1976681649684906, "rewards/rejected": 0.3724564015865326, "step": 668 }, { "epoch": 1.21, "learning_rate": 6.771620023760866e-08, "logits/chosen": -2.186006784439087, "logits/rejected": -2.3442318439483643, "logps/chosen": -2.2392804622650146, "logps/rejected": -22.916790008544922, "loss": 0.6613, "rewards/accuracies": 1.0, "rewards/chosen": 0.8939052820205688, "rewards/margins": 0.7332397699356079, "rewards/rejected": 0.16066551208496094, "step": 669 }, { "epoch": 1.21, "learning_rate": 6.762487458475246e-08, "logits/chosen": -2.2418012619018555, "logits/rejected": -2.243100643157959, "logps/chosen": -2.71308970451355, "logps/rejected": -11.165230751037598, "loss": 0.6823, "rewards/accuracies": 0.0, "rewards/chosen": 0.7048296928405762, "rewards/margins": -0.11952060461044312, "rewards/rejected": 0.8243502974510193, "step": 670 }, { "epoch": 1.21, "learning_rate": 6.75334817405172e-08, "logits/chosen": -2.3113112449645996, "logits/rejected": -2.402963638305664, "logps/chosen": -2.1422853469848633, "logps/rejected": -28.10938262939453, "loss": 0.6737, "rewards/accuracies": 1.0, "rewards/chosen": 0.6610230803489685, "rewards/margins": 0.13979125022888184, "rewards/rejected": 0.5212318301200867, "step": 671 }, { "epoch": 1.22, "learning_rate": 6.744202205332026e-08, "logits/chosen": -2.3202927112579346, "logits/rejected": -2.3188748359680176, "logps/chosen": -1.88463294506073, "logps/rejected": -3.4207396507263184, "loss": 0.6809, "rewards/accuracies": 0.0, "rewards/chosen": 0.5063854455947876, "rewards/margins": -0.1582382321357727, "rewards/rejected": 0.6646236777305603, "step": 672 }, { "epoch": 1.22, "learning_rate": 6.735049587183371e-08, "logits/chosen": -2.3214035034179688, "logits/rejected": -2.3231770992279053, "logps/chosen": -2.5822958946228027, "logps/rejected": -2.4587724208831787, "loss": 0.6153, "rewards/accuracies": 0.0, "rewards/chosen": 0.6491213440895081, "rewards/margins": -0.07969069480895996, "rewards/rejected": 0.728812038898468, "step": 673 }, { "epoch": 1.22, "learning_rate": 6.725890354498321e-08, "logits/chosen": -2.339611768722534, "logits/rejected": -2.3420281410217285, "logps/chosen": -1.7046903371810913, "logps/rejected": -2.5970780849456787, "loss": 0.6795, "rewards/accuracies": 1.0, "rewards/chosen": 0.5735704302787781, "rewards/margins": 0.06635797023773193, "rewards/rejected": 0.5072124600410461, "step": 674 }, { "epoch": 1.22, "learning_rate": 6.716724542194652e-08, "logits/chosen": -2.2353639602661133, "logits/rejected": -2.273465633392334, "logps/chosen": -1.5742253065109253, "logps/rejected": -27.692520141601562, "loss": 0.8778, "rewards/accuracies": 1.0, "rewards/chosen": 0.8076247572898865, "rewards/margins": 0.32081979513168335, "rewards/rejected": 0.4868049621582031, "step": 675 }, { "epoch": 1.22, "learning_rate": 6.707552185215228e-08, "logits/chosen": -2.3160526752471924, "logits/rejected": -2.2971949577331543, "logps/chosen": -7.614312171936035, "logps/rejected": 0.0, "loss": 0.6034, "rewards/accuracies": 1.0, "rewards/chosen": 0.4690817892551422, "rewards/margins": 0.4690817892551422, "rewards/rejected": 0.0, "step": 676 }, { "epoch": 1.22, "learning_rate": 6.69837331852786e-08, "logits/chosen": -2.3579115867614746, "logits/rejected": -2.3573734760284424, "logps/chosen": -1.6755588054656982, "logps/rejected": -1.472092866897583, "loss": 0.7031, "rewards/accuracies": 0.0, "rewards/chosen": 0.39989012479782104, "rewards/margins": -0.03615179657936096, "rewards/rejected": 0.436041921377182, "step": 677 }, { "epoch": 1.23, "learning_rate": 6.689187977125181e-08, "logits/chosen": -2.49397873878479, "logits/rejected": -2.495629072189331, "logps/chosen": -2.338352680206299, "logps/rejected": -11.241415977478027, "loss": 0.634, "rewards/accuracies": 1.0, "rewards/chosen": 0.7941697835922241, "rewards/margins": 0.4306834936141968, "rewards/rejected": 0.36348628997802734, "step": 678 }, { "epoch": 1.23, "learning_rate": 6.6799961960245e-08, "logits/chosen": -2.2808399200439453, "logits/rejected": -2.2524585723876953, "logps/chosen": -3.787324905395508, "logps/rejected": -3.185654640197754, "loss": 0.6835, "rewards/accuracies": 1.0, "rewards/chosen": 0.8311856389045715, "rewards/margins": 0.15849000215530396, "rewards/rejected": 0.6726956367492676, "step": 679 }, { "epoch": 1.23, "learning_rate": 6.670798010267684e-08, "logits/chosen": -2.276186466217041, "logits/rejected": -2.3378024101257324, "logps/chosen": -2.25101900100708, "logps/rejected": -28.862817764282227, "loss": 0.735, "rewards/accuracies": 1.0, "rewards/chosen": 0.6198192238807678, "rewards/margins": 0.291278600692749, "rewards/rejected": 0.3285406231880188, "step": 680 }, { "epoch": 1.23, "learning_rate": 6.661593454921013e-08, "logits/chosen": -2.170543670654297, "logits/rejected": -2.174734115600586, "logps/chosen": -2.4540374279022217, "logps/rejected": -6.625550746917725, "loss": 0.7476, "rewards/accuracies": 0.0, "rewards/chosen": 0.7779546976089478, "rewards/margins": -0.34271204471588135, "rewards/rejected": 1.120666742324829, "step": 681 }, { "epoch": 1.23, "learning_rate": 6.652382565075047e-08, "logits/chosen": -2.3432648181915283, "logits/rejected": -2.3527824878692627, "logps/chosen": -1.9053902626037598, "logps/rejected": -3.30798077583313, "loss": 0.6635, "rewards/accuracies": 0.0, "rewards/chosen": 0.5434078574180603, "rewards/margins": -0.22648435831069946, "rewards/rejected": 0.7698922157287598, "step": 682 }, { "epoch": 1.24, "learning_rate": 6.6431653758445e-08, "logits/chosen": -2.285541534423828, "logits/rejected": -2.3011398315429688, "logps/chosen": -3.912698984146118, "logps/rejected": -19.10348129272461, "loss": 0.5738, "rewards/accuracies": 1.0, "rewards/chosen": 0.7089830636978149, "rewards/margins": 0.2906744182109833, "rewards/rejected": 0.41830864548683167, "step": 683 }, { "epoch": 1.24, "learning_rate": 6.633941922368099e-08, "logits/chosen": -2.37795090675354, "logits/rejected": -2.3519816398620605, "logps/chosen": -2.7564537525177, "logps/rejected": 0.0, "loss": 0.5434, "rewards/accuracies": 1.0, "rewards/chosen": 0.7292971611022949, "rewards/margins": 0.7292971611022949, "rewards/rejected": 0.0, "step": 684 }, { "epoch": 1.24, "learning_rate": 6.62471223980845e-08, "logits/chosen": -2.411252737045288, "logits/rejected": -2.37780499458313, "logps/chosen": -12.906818389892578, "logps/rejected": 0.0, "loss": 0.483, "rewards/accuracies": 1.0, "rewards/chosen": 0.879689633846283, "rewards/margins": 0.879689633846283, "rewards/rejected": 0.0, "step": 685 }, { "epoch": 1.24, "learning_rate": 6.615476363351909e-08, "logits/chosen": -2.415970802307129, "logits/rejected": -2.4204142093658447, "logps/chosen": -1.726979374885559, "logps/rejected": -3.61690616607666, "loss": 1.0257, "rewards/accuracies": 0.0, "rewards/chosen": 0.7060108184814453, "rewards/margins": -0.17067939043045044, "rewards/rejected": 0.8766902089118958, "step": 686 }, { "epoch": 1.24, "learning_rate": 6.606234328208446e-08, "logits/chosen": -2.2420077323913574, "logits/rejected": -2.2503762245178223, "logps/chosen": -6.019117832183838, "logps/rejected": -2.351428508758545, "loss": 0.6377, "rewards/accuracies": 0.0, "rewards/chosen": 0.5115386843681335, "rewards/margins": -0.031578123569488525, "rewards/rejected": 0.5431168079376221, "step": 687 }, { "epoch": 1.24, "learning_rate": 6.596986169611505e-08, "logits/chosen": -2.3793129920959473, "logits/rejected": -2.3786094188690186, "logps/chosen": -2.0059075355529785, "logps/rejected": -6.007228374481201, "loss": 0.7927, "rewards/accuracies": 0.0, "rewards/chosen": 0.4232642650604248, "rewards/margins": -0.421093225479126, "rewards/rejected": 0.8443574905395508, "step": 688 }, { "epoch": 1.25, "learning_rate": 6.587731922817881e-08, "logits/chosen": -2.4100561141967773, "logits/rejected": -2.4185755252838135, "logps/chosen": -2.0011696815490723, "logps/rejected": -4.044284820556641, "loss": 0.6557, "rewards/accuracies": 0.0, "rewards/chosen": 0.6609025597572327, "rewards/margins": -0.08038830757141113, "rewards/rejected": 0.7412908673286438, "step": 689 }, { "epoch": 1.25, "learning_rate": 6.578471623107573e-08, "logits/chosen": -2.3801827430725098, "logits/rejected": -2.39993953704834, "logps/chosen": 0.0, "logps/rejected": -3.7481961250305176, "loss": 0.8737, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": -0.7619299292564392, "rewards/rejected": 0.7619299292564392, "step": 690 }, { "epoch": 1.25, "learning_rate": 6.569205305783661e-08, "logits/chosen": -2.304419994354248, "logits/rejected": -2.4021248817443848, "logps/chosen": -2.2690916061401367, "logps/rejected": -22.435060501098633, "loss": 0.5809, "rewards/accuracies": 1.0, "rewards/chosen": 0.5842993855476379, "rewards/margins": 0.3718493580818176, "rewards/rejected": 0.2124500274658203, "step": 691 }, { "epoch": 1.25, "learning_rate": 6.559933006172162e-08, "logits/chosen": -2.3198957443237305, "logits/rejected": -2.325810670852661, "logps/chosen": -2.059715986251831, "logps/rejected": -11.721037864685059, "loss": 0.5161, "rewards/accuracies": 1.0, "rewards/chosen": 0.7762616276741028, "rewards/margins": 0.3215797543525696, "rewards/rejected": 0.4546818733215332, "step": 692 }, { "epoch": 1.25, "learning_rate": 6.550654759621899e-08, "logits/chosen": -2.4718916416168213, "logits/rejected": -2.451106309890747, "logps/chosen": -9.17906665802002, "logps/rejected": -2.059447765350342, "loss": 0.6153, "rewards/accuracies": 1.0, "rewards/chosen": 0.8656603693962097, "rewards/margins": 0.34019607305526733, "rewards/rejected": 0.5254642963409424, "step": 693 }, { "epoch": 1.25, "learning_rate": 6.541370601504367e-08, "logits/chosen": -2.3449838161468506, "logits/rejected": -2.3421199321746826, "logps/chosen": -2.0685698986053467, "logps/rejected": -6.695443153381348, "loss": 0.6875, "rewards/accuracies": 0.0, "rewards/chosen": 0.5850507020950317, "rewards/margins": -0.23201853036880493, "rewards/rejected": 0.8170692324638367, "step": 694 }, { "epoch": 1.26, "learning_rate": 6.532080567213604e-08, "logits/chosen": -2.3999335765838623, "logits/rejected": -2.3939056396484375, "logps/chosen": -10.134496688842773, "logps/rejected": -4.290919303894043, "loss": 0.6492, "rewards/accuracies": 1.0, "rewards/chosen": 0.7133602499961853, "rewards/margins": 0.15700942277908325, "rewards/rejected": 0.556350827217102, "step": 695 }, { "epoch": 1.26, "learning_rate": 6.522784692166037e-08, "logits/chosen": -2.4347469806671143, "logits/rejected": -2.4706506729125977, "logps/chosen": -2.5956830978393555, "logps/rejected": -18.288883209228516, "loss": 0.8874, "rewards/accuracies": 0.0, "rewards/chosen": 0.6010783314704895, "rewards/margins": -0.23488378524780273, "rewards/rejected": 0.8359621167182922, "step": 696 }, { "epoch": 1.26, "learning_rate": 6.513483011800375e-08, "logits/chosen": -2.2504725456237793, "logits/rejected": -2.3330538272857666, "logps/chosen": -2.3568735122680664, "logps/rejected": -23.892444610595703, "loss": 0.6561, "rewards/accuracies": 1.0, "rewards/chosen": 0.5247216820716858, "rewards/margins": 0.091562420129776, "rewards/rejected": 0.4331592619419098, "step": 697 }, { "epoch": 1.26, "learning_rate": 6.504175561577444e-08, "logits/chosen": -2.1679553985595703, "logits/rejected": -2.2354676723480225, "logps/chosen": -2.919010877609253, "logps/rejected": -22.921749114990234, "loss": 0.6385, "rewards/accuracies": 0.0, "rewards/chosen": 0.48677656054496765, "rewards/margins": -0.07039698958396912, "rewards/rejected": 0.5571735501289368, "step": 698 }, { "epoch": 1.26, "learning_rate": 6.494862376980076e-08, "logits/chosen": -2.414827346801758, "logits/rejected": -2.414827346801758, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 699 }, { "epoch": 1.27, "learning_rate": 6.485543493512966e-08, "logits/chosen": -2.229840040206909, "logits/rejected": -2.2268762588500977, "logps/chosen": -0.8651479482650757, "logps/rejected": -3.930243730545044, "loss": 0.5521, "rewards/accuracies": 1.0, "rewards/chosen": 0.5472320914268494, "rewards/margins": 0.08929619193077087, "rewards/rejected": 0.4579358994960785, "step": 700 }, { "epoch": 1.27, "learning_rate": 6.476218946702523e-08, "logits/chosen": -2.198732852935791, "logits/rejected": -2.2195773124694824, "logps/chosen": -1.7281825542449951, "logps/rejected": -11.871305465698242, "loss": 0.6679, "rewards/accuracies": 1.0, "rewards/chosen": 0.7149623036384583, "rewards/margins": 0.28425532579421997, "rewards/rejected": 0.4307069778442383, "step": 701 }, { "epoch": 1.27, "learning_rate": 6.466888772096756e-08, "logits/chosen": -2.33260440826416, "logits/rejected": -2.519503355026245, "logps/chosen": -1.9596524238586426, "logps/rejected": -52.05574417114258, "loss": 0.7082, "rewards/accuracies": 1.0, "rewards/chosen": 0.850756824016571, "rewards/margins": 0.5661525726318359, "rewards/rejected": 0.2846042811870575, "step": 702 }, { "epoch": 1.27, "learning_rate": 6.457553005265129e-08, "logits/chosen": -2.2864108085632324, "logits/rejected": -2.3697164058685303, "logps/chosen": -1.4099262952804565, "logps/rejected": -31.572751998901367, "loss": 0.6379, "rewards/accuracies": 1.0, "rewards/chosen": 0.6804370284080505, "rewards/margins": 0.41606077551841736, "rewards/rejected": 0.2643762528896332, "step": 703 }, { "epoch": 1.27, "learning_rate": 6.448211681798418e-08, "logits/chosen": -2.3391242027282715, "logits/rejected": -2.3348183631896973, "logps/chosen": -1.580553412437439, "logps/rejected": -15.825445175170898, "loss": 0.6395, "rewards/accuracies": 1.0, "rewards/chosen": 0.8465308547019958, "rewards/margins": 0.6608901023864746, "rewards/rejected": 0.18564072251319885, "step": 704 }, { "epoch": 1.27, "learning_rate": 6.438864837308592e-08, "logits/chosen": -2.230679512023926, "logits/rejected": -2.226699113845825, "logps/chosen": -1.5861636400222778, "logps/rejected": -6.492067337036133, "loss": 0.8039, "rewards/accuracies": 0.0, "rewards/chosen": 0.4958969056606293, "rewards/margins": -0.05392369627952576, "rewards/rejected": 0.549820601940155, "step": 705 }, { "epoch": 1.28, "learning_rate": 6.429512507428663e-08, "logits/chosen": -2.296107053756714, "logits/rejected": -2.2927825450897217, "logps/chosen": -1.5003926753997803, "logps/rejected": -6.382221221923828, "loss": 0.7963, "rewards/accuracies": 0.0, "rewards/chosen": 0.5394501686096191, "rewards/margins": -0.23379528522491455, "rewards/rejected": 0.7732454538345337, "step": 706 }, { "epoch": 1.28, "learning_rate": 6.420154727812551e-08, "logits/chosen": -2.3247692584991455, "logits/rejected": -2.3287365436553955, "logps/chosen": -2.583890438079834, "logps/rejected": -1.7638273239135742, "loss": 0.5688, "rewards/accuracies": 1.0, "rewards/chosen": 0.7940701842308044, "rewards/margins": 0.22715258598327637, "rewards/rejected": 0.5669175982475281, "step": 707 }, { "epoch": 1.28, "learning_rate": 6.41079153413496e-08, "logits/chosen": -2.171710729598999, "logits/rejected": -2.165325880050659, "logps/chosen": -9.448663711547852, "logps/rejected": -3.298196315765381, "loss": 0.7077, "rewards/accuracies": 0.0, "rewards/chosen": 0.5536175966262817, "rewards/margins": -0.13332217931747437, "rewards/rejected": 0.6869397759437561, "step": 708 }, { "epoch": 1.28, "learning_rate": 6.401422962091228e-08, "logits/chosen": -2.259683609008789, "logits/rejected": -2.259568452835083, "logps/chosen": -2.2207396030426025, "logps/rejected": -2.8398866653442383, "loss": 0.6013, "rewards/accuracies": 1.0, "rewards/chosen": 0.7789344787597656, "rewards/margins": 0.2962247431278229, "rewards/rejected": 0.48270973563194275, "step": 709 }, { "epoch": 1.28, "learning_rate": 6.392049047397203e-08, "logits/chosen": -2.3002007007598877, "logits/rejected": -2.355971574783325, "logps/chosen": -4.225551605224609, "logps/rejected": -24.473344802856445, "loss": 0.5759, "rewards/accuracies": 1.0, "rewards/chosen": 0.8507519960403442, "rewards/margins": 0.33548498153686523, "rewards/rejected": 0.515267014503479, "step": 710 }, { "epoch": 1.29, "learning_rate": 6.382669825789092e-08, "logits/chosen": -2.3335120677948, "logits/rejected": -2.3369834423065186, "logps/chosen": -1.2543364763259888, "logps/rejected": -2.739046573638916, "loss": 0.6572, "rewards/accuracies": 1.0, "rewards/chosen": 0.7235608100891113, "rewards/margins": 0.037897348403930664, "rewards/rejected": 0.6856634616851807, "step": 711 }, { "epoch": 1.29, "learning_rate": 6.37328533302334e-08, "logits/chosen": -2.404639482498169, "logits/rejected": -2.406589984893799, "logps/chosen": -4.3496928215026855, "logps/rejected": -3.501251459121704, "loss": 0.6306, "rewards/accuracies": 1.0, "rewards/chosen": 0.924482524394989, "rewards/margins": 0.35137420892715454, "rewards/rejected": 0.5731083154678345, "step": 712 }, { "epoch": 1.29, "learning_rate": 6.363895604876488e-08, "logits/chosen": -2.2573680877685547, "logits/rejected": -2.266676664352417, "logps/chosen": -1.179031491279602, "logps/rejected": -2.5496811866760254, "loss": 0.6045, "rewards/accuracies": 1.0, "rewards/chosen": 0.6438769102096558, "rewards/margins": 0.058766961097717285, "rewards/rejected": 0.5851099491119385, "step": 713 }, { "epoch": 1.29, "learning_rate": 6.354500677145032e-08, "logits/chosen": -2.201305627822876, "logits/rejected": -2.1999025344848633, "logps/chosen": -1.4572274684906006, "logps/rejected": -4.7391180992126465, "loss": 0.6603, "rewards/accuracies": 1.0, "rewards/chosen": 0.5352990031242371, "rewards/margins": 0.17452099919319153, "rewards/rejected": 0.36077800393104553, "step": 714 }, { "epoch": 1.29, "learning_rate": 6.345100585645293e-08, "logits/chosen": -2.462416648864746, "logits/rejected": -2.468484878540039, "logps/chosen": -1.6485319137573242, "logps/rejected": -2.3843905925750732, "loss": 0.6653, "rewards/accuracies": 1.0, "rewards/chosen": 0.80464106798172, "rewards/margins": 0.1678069829940796, "rewards/rejected": 0.6368340849876404, "step": 715 }, { "epoch": 1.29, "learning_rate": 6.335695366213277e-08, "logits/chosen": -2.3135104179382324, "logits/rejected": -2.3167576789855957, "logps/chosen": -4.092587947845459, "logps/rejected": -2.9840922355651855, "loss": 0.7436, "rewards/accuracies": 0.0, "rewards/chosen": 0.5443300008773804, "rewards/margins": -0.05363708734512329, "rewards/rejected": 0.5979670882225037, "step": 716 }, { "epoch": 1.3, "learning_rate": 6.326285054704538e-08, "logits/chosen": -2.3284342288970947, "logits/rejected": -2.3438220024108887, "logps/chosen": -1.7106722593307495, "logps/rejected": -7.272143840789795, "loss": 0.7363, "rewards/accuracies": 1.0, "rewards/chosen": 0.7312830090522766, "rewards/margins": 0.14624732732772827, "rewards/rejected": 0.5850356817245483, "step": 717 }, { "epoch": 1.3, "learning_rate": 6.316869686994045e-08, "logits/chosen": -2.399928331375122, "logits/rejected": -2.40781307220459, "logps/chosen": -2.6004226207733154, "logps/rejected": -18.306293487548828, "loss": 0.6236, "rewards/accuracies": 1.0, "rewards/chosen": 0.6239334940910339, "rewards/margins": 0.17885121703147888, "rewards/rejected": 0.44508227705955505, "step": 718 }, { "epoch": 1.3, "learning_rate": 6.30744929897604e-08, "logits/chosen": -2.3652331829071045, "logits/rejected": -2.3873190879821777, "logps/chosen": -5.761752128601074, "logps/rejected": -13.115777969360352, "loss": 0.7133, "rewards/accuracies": 0.0, "rewards/chosen": 0.5679482817649841, "rewards/margins": -0.2846977710723877, "rewards/rejected": 0.8526460528373718, "step": 719 }, { "epoch": 1.3, "learning_rate": 6.298023926563908e-08, "logits/chosen": -2.442636489868164, "logits/rejected": -2.318601608276367, "logps/chosen": -20.435287475585938, "logps/rejected": -6.920109748840332, "loss": 0.5595, "rewards/accuracies": 1.0, "rewards/chosen": 0.8965652585029602, "rewards/margins": 0.3379868268966675, "rewards/rejected": 0.5585784316062927, "step": 720 }, { "epoch": 1.3, "learning_rate": 6.28859360569003e-08, "logits/chosen": -2.3051443099975586, "logits/rejected": -2.3358843326568604, "logps/chosen": -3.7746293544769287, "logps/rejected": -12.232810020446777, "loss": 0.6896, "rewards/accuracies": 0.0, "rewards/chosen": 0.43926793336868286, "rewards/margins": -0.4507567286491394, "rewards/rejected": 0.8900246620178223, "step": 721 }, { "epoch": 1.31, "learning_rate": 6.279158372305658e-08, "logits/chosen": -2.3302059173583984, "logits/rejected": -2.3329405784606934, "logps/chosen": -1.3627949953079224, "logps/rejected": -1.5924386978149414, "loss": 0.6195, "rewards/accuracies": 1.0, "rewards/chosen": 0.5883762240409851, "rewards/margins": 0.11175224184989929, "rewards/rejected": 0.4766239821910858, "step": 722 }, { "epoch": 1.31, "learning_rate": 6.26971826238077e-08, "logits/chosen": -2.382666826248169, "logits/rejected": -2.3899741172790527, "logps/chosen": -2.7262074947357178, "logps/rejected": -13.571250915527344, "loss": 0.5796, "rewards/accuracies": 1.0, "rewards/chosen": 0.9290698170661926, "rewards/margins": 0.4429829716682434, "rewards/rejected": 0.4860868453979492, "step": 723 }, { "epoch": 1.31, "learning_rate": 6.260273311903934e-08, "logits/chosen": -2.290846586227417, "logits/rejected": -2.4322962760925293, "logps/chosen": -1.967942476272583, "logps/rejected": -40.51221466064453, "loss": 0.5669, "rewards/accuracies": 1.0, "rewards/chosen": 0.6583720445632935, "rewards/margins": 0.8156039714813232, "rewards/rejected": -0.1572319120168686, "step": 724 }, { "epoch": 1.31, "learning_rate": 6.25082355688217e-08, "logits/chosen": -2.3595240116119385, "logits/rejected": -2.3578779697418213, "logps/chosen": -3.4261016845703125, "logps/rejected": -3.1473608016967773, "loss": 0.5525, "rewards/accuracies": 1.0, "rewards/chosen": 0.8020873069763184, "rewards/margins": 0.11975038051605225, "rewards/rejected": 0.6823369264602661, "step": 725 }, { "epoch": 1.31, "learning_rate": 6.241369033340817e-08, "logits/chosen": -2.330796718597412, "logits/rejected": -2.3314895629882812, "logps/chosen": -1.209855318069458, "logps/rejected": -2.100620746612549, "loss": 0.5346, "rewards/accuracies": 0.0, "rewards/chosen": 0.7852663993835449, "rewards/margins": -0.012991130352020264, "rewards/rejected": 0.7982575297355652, "step": 726 }, { "epoch": 1.31, "learning_rate": 6.231909777323396e-08, "logits/chosen": -2.284675359725952, "logits/rejected": -2.3687679767608643, "logps/chosen": -2.7551870346069336, "logps/rejected": -41.40277099609375, "loss": 0.4182, "rewards/accuracies": 1.0, "rewards/chosen": 0.8907291293144226, "rewards/margins": 1.114675521850586, "rewards/rejected": -0.22394637763500214, "step": 727 }, { "epoch": 1.32, "learning_rate": 6.222445824891462e-08, "logits/chosen": -2.3324532508850098, "logits/rejected": -2.3256418704986572, "logps/chosen": -3.2880868911743164, "logps/rejected": -2.9252774715423584, "loss": 0.5798, "rewards/accuracies": 0.0, "rewards/chosen": 0.5396601557731628, "rewards/margins": -0.0651700496673584, "rewards/rejected": 0.6048302054405212, "step": 728 }, { "epoch": 1.32, "learning_rate": 6.212977212124478e-08, "logits/chosen": -2.3984925746917725, "logits/rejected": -2.3975841999053955, "logps/chosen": -2.4464683532714844, "logps/rejected": -2.423981189727783, "loss": 0.7293, "rewards/accuracies": 0.0, "rewards/chosen": 0.45168429613113403, "rewards/margins": -0.14241337776184082, "rewards/rejected": 0.5940976738929749, "step": 729 }, { "epoch": 1.32, "learning_rate": 6.203503975119678e-08, "logits/chosen": -2.3913731575012207, "logits/rejected": -2.4060111045837402, "logps/chosen": -6.5992913246154785, "logps/rejected": -9.799210548400879, "loss": 0.6458, "rewards/accuracies": 1.0, "rewards/chosen": 0.8714128732681274, "rewards/margins": 0.5751627087593079, "rewards/rejected": 0.2962501645088196, "step": 730 }, { "epoch": 1.32, "learning_rate": 6.194026149991915e-08, "logits/chosen": -2.3479936122894287, "logits/rejected": -2.3506269454956055, "logps/chosen": -3.8995401859283447, "logps/rejected": -8.456439971923828, "loss": 0.7471, "rewards/accuracies": 0.0, "rewards/chosen": 0.8030602335929871, "rewards/margins": -0.1816086769104004, "rewards/rejected": 0.9846689105033875, "step": 731 }, { "epoch": 1.32, "learning_rate": 6.184543772873546e-08, "logits/chosen": -2.316549777984619, "logits/rejected": -2.338496446609497, "logps/chosen": -1.8643015623092651, "logps/rejected": -16.8817138671875, "loss": 0.6508, "rewards/accuracies": 0.0, "rewards/chosen": 0.603485107421875, "rewards/margins": -0.17197877168655396, "rewards/rejected": 0.775463879108429, "step": 732 }, { "epoch": 1.33, "learning_rate": 6.175056879914269e-08, "logits/chosen": -2.327301263809204, "logits/rejected": -2.326035499572754, "logps/chosen": -19.07762336730957, "logps/rejected": -16.521072387695312, "loss": 0.6471, "rewards/accuracies": 1.0, "rewards/chosen": 0.42124462127685547, "rewards/margins": 0.10335978865623474, "rewards/rejected": 0.3178848326206207, "step": 733 }, { "epoch": 1.33, "learning_rate": 6.165565507281008e-08, "logits/chosen": -2.268494129180908, "logits/rejected": -2.250288724899292, "logps/chosen": -5.156248092651367, "logps/rejected": -2.905980348587036, "loss": 0.6988, "rewards/accuracies": 1.0, "rewards/chosen": 0.7758522033691406, "rewards/margins": 0.35017338395118713, "rewards/rejected": 0.4256788194179535, "step": 734 }, { "epoch": 1.33, "learning_rate": 6.156069691157756e-08, "logits/chosen": -2.376866102218628, "logits/rejected": -2.3790626525878906, "logps/chosen": -3.6068766117095947, "logps/rejected": -7.271340370178223, "loss": 0.7319, "rewards/accuracies": 0.0, "rewards/chosen": 0.6267809271812439, "rewards/margins": -0.14401710033416748, "rewards/rejected": 0.7707980275154114, "step": 735 }, { "epoch": 1.33, "learning_rate": 6.146569467745453e-08, "logits/chosen": -2.3812243938446045, "logits/rejected": -2.414785146713257, "logps/chosen": -5.538195610046387, "logps/rejected": -26.36650276184082, "loss": 0.6411, "rewards/accuracies": 1.0, "rewards/chosen": 0.9945279359817505, "rewards/margins": 0.19537955522537231, "rewards/rejected": 0.7991483807563782, "step": 736 }, { "epoch": 1.33, "learning_rate": 6.137064873261838e-08, "logits/chosen": -2.3311171531677246, "logits/rejected": -2.3881428241729736, "logps/chosen": -3.4950504302978516, "logps/rejected": -26.820222854614258, "loss": 0.5501, "rewards/accuracies": 1.0, "rewards/chosen": 0.7495837211608887, "rewards/margins": 0.5343306660652161, "rewards/rejected": 0.2152530699968338, "step": 737 }, { "epoch": 1.33, "learning_rate": 6.127555943941314e-08, "logits/chosen": -2.290235996246338, "logits/rejected": -2.2958502769470215, "logps/chosen": -3.137310028076172, "logps/rejected": -6.404566287994385, "loss": 0.8315, "rewards/accuracies": 0.0, "rewards/chosen": 0.5587393641471863, "rewards/margins": -0.4997428059577942, "rewards/rejected": 1.0584821701049805, "step": 738 }, { "epoch": 1.34, "learning_rate": 6.11804271603481e-08, "logits/chosen": -2.2939565181732178, "logits/rejected": -2.2934093475341797, "logps/chosen": -1.3914330005645752, "logps/rejected": -5.726857662200928, "loss": 0.7248, "rewards/accuracies": 1.0, "rewards/chosen": 0.6181179881095886, "rewards/margins": 0.2611190974712372, "rewards/rejected": 0.35699889063835144, "step": 739 }, { "epoch": 1.34, "learning_rate": 6.108525225809641e-08, "logits/chosen": -2.28426194190979, "logits/rejected": -2.2838943004608154, "logps/chosen": -2.060929775238037, "logps/rejected": -3.714320659637451, "loss": 0.5863, "rewards/accuracies": 1.0, "rewards/chosen": 0.6877424716949463, "rewards/margins": 0.12886273860931396, "rewards/rejected": 0.5588797330856323, "step": 740 }, { "epoch": 1.34, "learning_rate": 6.099003509549375e-08, "logits/chosen": -2.4411041736602783, "logits/rejected": -2.440920352935791, "logps/chosen": -4.555419921875, "logps/rejected": -7.6491546630859375, "loss": 0.6006, "rewards/accuracies": 0.0, "rewards/chosen": 0.7314767241477966, "rewards/margins": -0.26577669382095337, "rewards/rejected": 0.99725341796875, "step": 741 }, { "epoch": 1.34, "learning_rate": 6.089477603553684e-08, "logits/chosen": -2.316351890563965, "logits/rejected": -2.319572687149048, "logps/chosen": -6.948067665100098, "logps/rejected": -6.336682319641113, "loss": 0.493, "rewards/accuracies": 1.0, "rewards/chosen": 0.9109784364700317, "rewards/margins": 0.6678205728530884, "rewards/rejected": 0.24315786361694336, "step": 742 }, { "epoch": 1.34, "learning_rate": 6.079947544138224e-08, "logits/chosen": -2.3947839736938477, "logits/rejected": -2.4864578247070312, "logps/chosen": -6.561782360076904, "logps/rejected": -26.35103416442871, "loss": 0.6977, "rewards/accuracies": 0.0, "rewards/chosen": 0.5671932101249695, "rewards/margins": -0.33370494842529297, "rewards/rejected": 0.9008981585502625, "step": 743 }, { "epoch": 1.35, "learning_rate": 6.070413367634472e-08, "logits/chosen": -2.209916114807129, "logits/rejected": -2.2361629009246826, "logps/chosen": -1.7322800159454346, "logps/rejected": -10.168303489685059, "loss": 0.6968, "rewards/accuracies": 1.0, "rewards/chosen": 0.7651833891868591, "rewards/margins": 0.39226067066192627, "rewards/rejected": 0.37292271852493286, "step": 744 }, { "epoch": 1.35, "learning_rate": 6.06087511038961e-08, "logits/chosen": -2.1707613468170166, "logits/rejected": -2.2266743183135986, "logps/chosen": -2.32080078125, "logps/rejected": -26.695640563964844, "loss": 0.54, "rewards/accuracies": 1.0, "rewards/chosen": 0.7329697012901306, "rewards/margins": 0.39982131123542786, "rewards/rejected": 0.33314839005470276, "step": 745 }, { "epoch": 1.35, "learning_rate": 6.051332808766373e-08, "logits/chosen": -2.2886359691619873, "logits/rejected": -2.278695583343506, "logps/chosen": -4.970214366912842, "logps/rejected": -5.763737201690674, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.43469834327697754, "rewards/margins": -0.26281166076660156, "rewards/rejected": 0.6975100040435791, "step": 746 }, { "epoch": 1.35, "learning_rate": 6.041786499142916e-08, "logits/chosen": -2.4053711891174316, "logits/rejected": -2.4018208980560303, "logps/chosen": -0.9261355400085449, "logps/rejected": -6.3883562088012695, "loss": 0.6136, "rewards/accuracies": 1.0, "rewards/chosen": 0.6889646649360657, "rewards/margins": 0.17808645963668823, "rewards/rejected": 0.5108782052993774, "step": 747 }, { "epoch": 1.35, "learning_rate": 6.032236217912668e-08, "logits/chosen": -2.283905267715454, "logits/rejected": -2.2940378189086914, "logps/chosen": -2.65930438041687, "logps/rejected": -2.1486032009124756, "loss": 0.7466, "rewards/accuracies": 1.0, "rewards/chosen": 0.7753996253013611, "rewards/margins": 0.10565227270126343, "rewards/rejected": 0.6697473526000977, "step": 748 }, { "epoch": 1.35, "learning_rate": 6.022682001484212e-08, "logits/chosen": -2.2422239780426025, "logits/rejected": -2.2436578273773193, "logps/chosen": -3.8506531715393066, "logps/rejected": -9.627765655517578, "loss": 0.6817, "rewards/accuracies": 0.0, "rewards/chosen": 0.6838735342025757, "rewards/margins": -0.3637939691543579, "rewards/rejected": 1.0476675033569336, "step": 749 }, { "epoch": 1.36, "learning_rate": 6.01312388628112e-08, "logits/chosen": -2.367804765701294, "logits/rejected": -2.370253801345825, "logps/chosen": -2.576688766479492, "logps/rejected": -8.475749969482422, "loss": 0.7649, "rewards/accuracies": 0.0, "rewards/chosen": 0.5411468744277954, "rewards/margins": -0.3795047402381897, "rewards/rejected": 0.9206516146659851, "step": 750 }, { "epoch": 1.36, "learning_rate": 6.003561908741832e-08, "logits/chosen": -2.522319793701172, "logits/rejected": -2.515125036239624, "logps/chosen": -3.520310878753662, "logps/rejected": -5.507262706756592, "loss": 0.5583, "rewards/accuracies": 1.0, "rewards/chosen": 0.6211599707603455, "rewards/margins": 0.3032485842704773, "rewards/rejected": 0.31791138648986816, "step": 751 }, { "epoch": 1.36, "learning_rate": 5.993996105319515e-08, "logits/chosen": -2.17759370803833, "logits/rejected": -2.179115056991577, "logps/chosen": -2.1435749530792236, "logps/rejected": -9.267136573791504, "loss": 0.8101, "rewards/accuracies": 0.0, "rewards/chosen": 0.5458208918571472, "rewards/margins": -0.4844056963920593, "rewards/rejected": 1.0302265882492065, "step": 752 }, { "epoch": 1.36, "learning_rate": 5.984426512481918e-08, "logits/chosen": -2.306248664855957, "logits/rejected": -2.2975292205810547, "logps/chosen": -7.329416275024414, "logps/rejected": -3.048764705657959, "loss": 0.5497, "rewards/accuracies": 1.0, "rewards/chosen": 0.7176489233970642, "rewards/margins": 0.16410207748413086, "rewards/rejected": 0.5535468459129333, "step": 753 }, { "epoch": 1.36, "learning_rate": 5.974853166711238e-08, "logits/chosen": -2.325350761413574, "logits/rejected": -2.3339731693267822, "logps/chosen": -4.9127373695373535, "logps/rejected": -2.9105916023254395, "loss": 0.5932, "rewards/accuracies": 0.0, "rewards/chosen": 0.45798030495643616, "rewards/margins": -0.16266480088233948, "rewards/rejected": 0.6206451058387756, "step": 754 }, { "epoch": 1.37, "learning_rate": 5.965276104503978e-08, "logits/chosen": -2.2592103481292725, "logits/rejected": -2.2974486351013184, "logps/chosen": 0.0, "logps/rejected": -2.281920909881592, "loss": 1.0004, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": -0.8582983016967773, "rewards/rejected": 0.8582983016967773, "step": 755 }, { "epoch": 1.37, "learning_rate": 5.9556953623708115e-08, "logits/chosen": -2.345808744430542, "logits/rejected": -2.344761371612549, "logps/chosen": -6.984448432922363, "logps/rejected": -6.705207824707031, "loss": 0.6377, "rewards/accuracies": 1.0, "rewards/chosen": 0.48285484313964844, "rewards/margins": 0.1241324245929718, "rewards/rejected": 0.35872241854667664, "step": 756 }, { "epoch": 1.37, "learning_rate": 5.946110976836438e-08, "logits/chosen": -2.3650665283203125, "logits/rejected": -2.363467216491699, "logps/chosen": -1.149834156036377, "logps/rejected": -2.4589898586273193, "loss": 0.6763, "rewards/accuracies": 1.0, "rewards/chosen": 0.8252319693565369, "rewards/margins": 0.20872414112091064, "rewards/rejected": 0.6165078282356262, "step": 757 }, { "epoch": 1.37, "learning_rate": 5.936522984439449e-08, "logits/chosen": -2.3988871574401855, "logits/rejected": -2.39115834236145, "logps/chosen": -11.970252990722656, "logps/rejected": -3.0974960327148438, "loss": 0.7786, "rewards/accuracies": 1.0, "rewards/chosen": 0.8556257486343384, "rewards/margins": 0.33574914932250977, "rewards/rejected": 0.5198765993118286, "step": 758 }, { "epoch": 1.37, "learning_rate": 5.926931421732187e-08, "logits/chosen": -2.234856367111206, "logits/rejected": -2.2415449619293213, "logps/chosen": -3.9525177478790283, "logps/rejected": -5.724009990692139, "loss": 0.637, "rewards/accuracies": 1.0, "rewards/chosen": 0.724855899810791, "rewards/margins": 0.34709781408309937, "rewards/rejected": 0.37775808572769165, "step": 759 }, { "epoch": 1.37, "learning_rate": 5.917336325280604e-08, "logits/chosen": -2.2076497077941895, "logits/rejected": -2.2162370681762695, "logps/chosen": -2.1997690200805664, "logps/rejected": -4.678686141967773, "loss": 0.7244, "rewards/accuracies": 1.0, "rewards/chosen": 0.7361931800842285, "rewards/margins": 0.06538587808609009, "rewards/rejected": 0.6708073019981384, "step": 760 }, { "epoch": 1.38, "learning_rate": 5.907737731664121e-08, "logits/chosen": -2.2474355697631836, "logits/rejected": -2.2483599185943604, "logps/chosen": -4.18364143371582, "logps/rejected": -3.1918864250183105, "loss": 0.4795, "rewards/accuracies": 1.0, "rewards/chosen": 0.8160921931266785, "rewards/margins": 0.28339219093322754, "rewards/rejected": 0.5327000021934509, "step": 761 }, { "epoch": 1.38, "learning_rate": 5.898135677475499e-08, "logits/chosen": -2.4005684852600098, "logits/rejected": -2.3946328163146973, "logps/chosen": -2.8066821098327637, "logps/rejected": -4.958557605743408, "loss": 0.7685, "rewards/accuracies": 0.0, "rewards/chosen": 0.538398802280426, "rewards/margins": -0.3479180932044983, "rewards/rejected": 0.8863168954849243, "step": 762 }, { "epoch": 1.38, "learning_rate": 5.8885301993206847e-08, "logits/chosen": -2.2837305068969727, "logits/rejected": -2.2886807918548584, "logps/chosen": -0.7674643993377686, "logps/rejected": -8.943198204040527, "loss": 0.7434, "rewards/accuracies": 0.0, "rewards/chosen": 0.6049962043762207, "rewards/margins": -0.36210501194000244, "rewards/rejected": 0.9671012163162231, "step": 763 }, { "epoch": 1.38, "learning_rate": 5.8789213338186794e-08, "logits/chosen": -2.2843093872070312, "logits/rejected": -2.295133352279663, "logps/chosen": -3.356541395187378, "logps/rejected": -6.240007400512695, "loss": 0.5774, "rewards/accuracies": 1.0, "rewards/chosen": 0.6339295506477356, "rewards/margins": 0.2951068878173828, "rewards/rejected": 0.3388226628303528, "step": 764 }, { "epoch": 1.38, "learning_rate": 5.869309117601402e-08, "logits/chosen": -2.354124069213867, "logits/rejected": -2.325836420059204, "logps/chosen": -2.5114946365356445, "logps/rejected": 0.0, "loss": 0.4853, "rewards/accuracies": 1.0, "rewards/chosen": 0.8244418501853943, "rewards/margins": 0.8244418501853943, "rewards/rejected": 0.0, "step": 765 }, { "epoch": 1.39, "learning_rate": 5.8596935873135413e-08, "logits/chosen": -2.2558062076568604, "logits/rejected": -2.327768564224243, "logps/chosen": -1.6898444890975952, "logps/rejected": -26.86074447631836, "loss": 0.6857, "rewards/accuracies": 1.0, "rewards/chosen": 0.7051329016685486, "rewards/margins": 0.03019428253173828, "rewards/rejected": 0.6749386191368103, "step": 766 }, { "epoch": 1.39, "learning_rate": 5.850074779612418e-08, "logits/chosen": -2.3646929264068604, "logits/rejected": -2.410228729248047, "logps/chosen": -3.63775897026062, "logps/rejected": -27.071687698364258, "loss": 0.4576, "rewards/accuracies": 1.0, "rewards/chosen": 0.6621381044387817, "rewards/margins": 1.0059850215911865, "rewards/rejected": -0.3438468873500824, "step": 767 }, { "epoch": 1.39, "learning_rate": 5.8404527311678555e-08, "logits/chosen": -2.277200937271118, "logits/rejected": -2.276431083679199, "logps/chosen": -3.053494930267334, "logps/rejected": -4.206569194793701, "loss": 0.5303, "rewards/accuracies": 1.0, "rewards/chosen": 0.8031242489814758, "rewards/margins": 0.35766416788101196, "rewards/rejected": 0.44546008110046387, "step": 768 }, { "epoch": 1.39, "learning_rate": 5.830827478662026e-08, "logits/chosen": -2.242692708969116, "logits/rejected": -2.2473013401031494, "logps/chosen": -3.053133249282837, "logps/rejected": -3.9253365993499756, "loss": 0.8485, "rewards/accuracies": 0.0, "rewards/chosen": 0.5668601393699646, "rewards/margins": -0.18546569347381592, "rewards/rejected": 0.7523258328437805, "step": 769 }, { "epoch": 1.39, "learning_rate": 5.8211990587893145e-08, "logits/chosen": -2.3628110885620117, "logits/rejected": -2.342312812805176, "logps/chosen": -4.990922451019287, "logps/rejected": -2.4084267616271973, "loss": 0.6357, "rewards/accuracies": 1.0, "rewards/chosen": 0.7790535688400269, "rewards/margins": 0.17502367496490479, "rewards/rejected": 0.6040298938751221, "step": 770 }, { "epoch": 1.39, "learning_rate": 5.811567508256188e-08, "logits/chosen": -2.3520619869232178, "logits/rejected": -2.3573741912841797, "logps/chosen": -2.393104076385498, "logps/rejected": -4.12042236328125, "loss": 0.6584, "rewards/accuracies": 1.0, "rewards/chosen": 0.5086113214492798, "rewards/margins": 0.11038628220558167, "rewards/rejected": 0.3982250392436981, "step": 771 }, { "epoch": 1.4, "learning_rate": 5.8019328637810426e-08, "logits/chosen": -2.370223045349121, "logits/rejected": -2.362545967102051, "logps/chosen": -14.974393844604492, "logps/rejected": -2.58955717086792, "loss": 0.787, "rewards/accuracies": 0.0, "rewards/chosen": 0.6622095108032227, "rewards/margins": -0.06665933132171631, "rewards/rejected": 0.728868842124939, "step": 772 }, { "epoch": 1.4, "learning_rate": 5.792295162094072e-08, "logits/chosen": -2.3675942420959473, "logits/rejected": -2.3556571006774902, "logps/chosen": -7.990748405456543, "logps/rejected": -2.560065507888794, "loss": 0.592, "rewards/accuracies": 1.0, "rewards/chosen": 0.8686200380325317, "rewards/margins": 0.4320811331272125, "rewards/rejected": 0.4365389049053192, "step": 773 }, { "epoch": 1.4, "learning_rate": 5.7826544399371236e-08, "logits/chosen": -2.3446106910705566, "logits/rejected": -2.3201210498809814, "logps/chosen": -1.5495747327804565, "logps/rejected": 0.0, "loss": 0.4178, "rewards/accuracies": 1.0, "rewards/chosen": 0.8115200400352478, "rewards/margins": 0.8115200400352478, "rewards/rejected": 0.0, "step": 774 }, { "epoch": 1.4, "learning_rate": 5.773010734063562e-08, "logits/chosen": -2.246009349822998, "logits/rejected": -2.243232011795044, "logps/chosen": -4.760146141052246, "logps/rejected": -2.5259552001953125, "loss": 0.7264, "rewards/accuracies": 0.0, "rewards/chosen": 0.5159960985183716, "rewards/margins": -0.1435442566871643, "rewards/rejected": 0.6595403552055359, "step": 775 }, { "epoch": 1.4, "learning_rate": 5.763364081238124e-08, "logits/chosen": -2.3500518798828125, "logits/rejected": -2.3527305126190186, "logps/chosen": -6.701728820800781, "logps/rejected": -9.15396499633789, "loss": 0.7572, "rewards/accuracies": 0.0, "rewards/chosen": 0.5941304564476013, "rewards/margins": -0.3021727204322815, "rewards/rejected": 0.8963031768798828, "step": 776 }, { "epoch": 1.41, "learning_rate": 5.753714518236785e-08, "logits/chosen": -2.2737700939178467, "logits/rejected": -2.2714688777923584, "logps/chosen": -0.8202630281448364, "logps/rejected": -7.216319561004639, "loss": 0.7394, "rewards/accuracies": 0.0, "rewards/chosen": 0.5113914608955383, "rewards/margins": -0.21050935983657837, "rewards/rejected": 0.7219008207321167, "step": 777 }, { "epoch": 1.41, "learning_rate": 5.744062081846608e-08, "logits/chosen": -2.4682629108428955, "logits/rejected": -2.4784719944000244, "logps/chosen": -2.6365597248077393, "logps/rejected": -8.073025703430176, "loss": 0.7286, "rewards/accuracies": 0.0, "rewards/chosen": 0.5145870447158813, "rewards/margins": -0.3620833158493042, "rewards/rejected": 0.8766703605651855, "step": 778 }, { "epoch": 1.41, "learning_rate": 5.7344068088656175e-08, "logits/chosen": -2.417955160140991, "logits/rejected": -2.531663179397583, "logps/chosen": -5.196290016174316, "logps/rejected": -29.165895462036133, "loss": 0.5812, "rewards/accuracies": 0.0, "rewards/chosen": 0.4712936580181122, "rewards/margins": -0.28399714827537537, "rewards/rejected": 0.7552908062934875, "step": 779 }, { "epoch": 1.41, "learning_rate": 5.724748736102647e-08, "logits/chosen": -2.2773752212524414, "logits/rejected": -2.276155710220337, "logps/chosen": -0.7699185609817505, "logps/rejected": -4.557404518127441, "loss": 0.6165, "rewards/accuracies": 1.0, "rewards/chosen": 0.4726617932319641, "rewards/margins": 0.3155439496040344, "rewards/rejected": 0.1571178436279297, "step": 780 }, { "epoch": 1.41, "learning_rate": 5.715087900377207e-08, "logits/chosen": -2.439976453781128, "logits/rejected": -2.461390495300293, "logps/chosen": -3.3820066452026367, "logps/rejected": -47.74576187133789, "loss": 0.3967, "rewards/accuracies": 1.0, "rewards/chosen": 0.6197931170463562, "rewards/margins": 1.2252213954925537, "rewards/rejected": -0.6054283380508423, "step": 781 }, { "epoch": 1.41, "learning_rate": 5.705424338519338e-08, "logits/chosen": -2.3029415607452393, "logits/rejected": -2.313551664352417, "logps/chosen": -1.6572542190551758, "logps/rejected": -1.4574289321899414, "loss": 0.6211, "rewards/accuracies": 1.0, "rewards/chosen": 0.6261846423149109, "rewards/margins": 0.15258821845054626, "rewards/rejected": 0.4735964238643646, "step": 782 }, { "epoch": 1.42, "learning_rate": 5.6957580873694757e-08, "logits/chosen": -2.31205153465271, "logits/rejected": -2.311750650405884, "logps/chosen": -4.655521392822266, "logps/rejected": -8.205206871032715, "loss": 1.0285, "rewards/accuracies": 0.0, "rewards/chosen": 0.5372897982597351, "rewards/margins": -0.44825279712677, "rewards/rejected": 0.9855425953865051, "step": 783 }, { "epoch": 1.42, "learning_rate": 5.68608918377831e-08, "logits/chosen": -2.3090813159942627, "logits/rejected": -2.3147897720336914, "logps/chosen": -1.4776976108551025, "logps/rejected": -2.1656429767608643, "loss": 0.5871, "rewards/accuracies": 1.0, "rewards/chosen": 0.7113510966300964, "rewards/margins": 0.17773759365081787, "rewards/rejected": 0.5336135029792786, "step": 784 }, { "epoch": 1.42, "learning_rate": 5.6764176646066355e-08, "logits/chosen": -2.2499167919158936, "logits/rejected": -2.2395904064178467, "logps/chosen": -2.238416910171509, "logps/rejected": -4.184837341308594, "loss": 0.6976, "rewards/accuracies": 1.0, "rewards/chosen": 0.8153401613235474, "rewards/margins": 0.15437793731689453, "rewards/rejected": 0.6609622240066528, "step": 785 }, { "epoch": 1.42, "learning_rate": 5.666743566725226e-08, "logits/chosen": -2.4374771118164062, "logits/rejected": -2.4350523948669434, "logps/chosen": -13.553116798400879, "logps/rejected": -8.3447904586792, "loss": 0.4949, "rewards/accuracies": 1.0, "rewards/chosen": 0.8642835021018982, "rewards/margins": 0.2878182530403137, "rewards/rejected": 0.5764652490615845, "step": 786 }, { "epoch": 1.42, "learning_rate": 5.657066927014683e-08, "logits/chosen": -2.302621364593506, "logits/rejected": -2.308711051940918, "logps/chosen": -1.5801571607589722, "logps/rejected": -2.5899877548217773, "loss": 0.6467, "rewards/accuracies": 1.0, "rewards/chosen": 0.6317148208618164, "rewards/margins": 0.19533532857894897, "rewards/rejected": 0.43637949228286743, "step": 787 }, { "epoch": 1.42, "learning_rate": 5.647387782365298e-08, "logits/chosen": -2.3219001293182373, "logits/rejected": -2.314610481262207, "logps/chosen": -5.468343734741211, "logps/rejected": -2.4134130477905273, "loss": 0.5338, "rewards/accuracies": 1.0, "rewards/chosen": 0.9590135812759399, "rewards/margins": 0.4565116763114929, "rewards/rejected": 0.502501904964447, "step": 788 }, { "epoch": 1.43, "learning_rate": 5.637706169676912e-08, "logits/chosen": -2.244107723236084, "logits/rejected": -2.2511236667633057, "logps/chosen": -2.081315517425537, "logps/rejected": -4.722174644470215, "loss": 0.6332, "rewards/accuracies": 1.0, "rewards/chosen": 0.6749531626701355, "rewards/margins": 0.15734505653381348, "rewards/rejected": 0.517608106136322, "step": 789 }, { "epoch": 1.43, "learning_rate": 5.628022125858777e-08, "logits/chosen": -2.4037044048309326, "logits/rejected": -2.400378465652466, "logps/chosen": -2.8667221069335938, "logps/rejected": -3.309720039367676, "loss": 0.699, "rewards/accuracies": 0.0, "rewards/chosen": 0.3124406039714813, "rewards/margins": -0.20689913630485535, "rewards/rejected": 0.5193397402763367, "step": 790 }, { "epoch": 1.43, "learning_rate": 5.6183356878294096e-08, "logits/chosen": -2.27476167678833, "logits/rejected": -2.2838289737701416, "logps/chosen": -1.6709433794021606, "logps/rejected": -3.7554028034210205, "loss": 0.7398, "rewards/accuracies": 1.0, "rewards/chosen": 0.7675597071647644, "rewards/margins": 0.03846937417984009, "rewards/rejected": 0.7290903329849243, "step": 791 }, { "epoch": 1.43, "learning_rate": 5.608646892516456e-08, "logits/chosen": -2.433199882507324, "logits/rejected": -2.434946060180664, "logps/chosen": -2.1511571407318115, "logps/rejected": -1.4002556800842285, "loss": 0.639, "rewards/accuracies": 0.0, "rewards/chosen": 0.5106000304222107, "rewards/margins": -0.016556978225708008, "rewards/rejected": 0.5271570086479187, "step": 792 }, { "epoch": 1.43, "learning_rate": 5.5989557768565484e-08, "logits/chosen": -2.213592052459717, "logits/rejected": -2.2225563526153564, "logps/chosen": -1.8792532682418823, "logps/rejected": -5.967944145202637, "loss": 0.844, "rewards/accuracies": 0.0, "rewards/chosen": 0.6809470057487488, "rewards/margins": -0.2965794801712036, "rewards/rejected": 0.9775264859199524, "step": 793 }, { "epoch": 1.44, "learning_rate": 5.5892623777951665e-08, "logits/chosen": -2.3575830459594727, "logits/rejected": -2.3539888858795166, "logps/chosen": -1.51247239112854, "logps/rejected": -6.771148681640625, "loss": 0.6335, "rewards/accuracies": 1.0, "rewards/chosen": 0.6277531981468201, "rewards/margins": 0.23825371265411377, "rewards/rejected": 0.3894994854927063, "step": 794 }, { "epoch": 1.44, "learning_rate": 5.5795667322864933e-08, "logits/chosen": -2.3702733516693115, "logits/rejected": -2.367039442062378, "logps/chosen": -2.6722989082336426, "logps/rejected": -3.4259088039398193, "loss": 0.6589, "rewards/accuracies": 1.0, "rewards/chosen": 0.6436378359794617, "rewards/margins": 0.25427958369255066, "rewards/rejected": 0.389358252286911, "step": 795 }, { "epoch": 1.44, "learning_rate": 5.5698688772932766e-08, "logits/chosen": -2.339219093322754, "logits/rejected": -2.525886058807373, "logps/chosen": -6.136077404022217, "logps/rejected": -39.412010192871094, "loss": 0.6442, "rewards/accuracies": 0.0, "rewards/chosen": 0.19900499284267426, "rewards/margins": -0.18145652115345, "rewards/rejected": 0.38046151399612427, "step": 796 }, { "epoch": 1.44, "learning_rate": 5.5601688497866885e-08, "logits/chosen": -2.3805153369903564, "logits/rejected": -2.383061170578003, "logps/chosen": -7.924501419067383, "logps/rejected": -7.807568073272705, "loss": 0.593, "rewards/accuracies": 1.0, "rewards/chosen": 0.9171738028526306, "rewards/margins": 0.4600512385368347, "rewards/rejected": 0.4571225643157959, "step": 797 }, { "epoch": 1.44, "learning_rate": 5.550466686746179e-08, "logits/chosen": -2.2684714794158936, "logits/rejected": -2.2726104259490967, "logps/chosen": -2.6768388748168945, "logps/rejected": -4.624614715576172, "loss": 0.5818, "rewards/accuracies": 1.0, "rewards/chosen": 0.7890520095825195, "rewards/margins": 0.2681794762611389, "rewards/rejected": 0.5208725333213806, "step": 798 }, { "epoch": 1.44, "learning_rate": 5.540762425159346e-08, "logits/chosen": -2.3968183994293213, "logits/rejected": -2.403545618057251, "logps/chosen": -1.8265515565872192, "logps/rejected": -4.661068916320801, "loss": 0.7627, "rewards/accuracies": 0.0, "rewards/chosen": 0.35681232810020447, "rewards/margins": -0.25143536925315857, "rewards/rejected": 0.608247697353363, "step": 799 }, { "epoch": 1.45, "learning_rate": 5.531056102021783e-08, "logits/chosen": -2.208991289138794, "logits/rejected": -2.1997203826904297, "logps/chosen": -1.8539676666259766, "logps/rejected": -3.907205581665039, "loss": 0.6064, "rewards/accuracies": 1.0, "rewards/chosen": 0.7043902277946472, "rewards/margins": 0.17072492837905884, "rewards/rejected": 0.5336652994155884, "step": 800 }, { "epoch": 1.45, "learning_rate": 5.521347754336942e-08, "logits/chosen": -2.2065589427948, "logits/rejected": -2.225965738296509, "logps/chosen": -1.0238656997680664, "logps/rejected": -14.305220603942871, "loss": 0.7918, "rewards/accuracies": 0.0, "rewards/chosen": 0.38742595911026, "rewards/margins": -0.4250384569168091, "rewards/rejected": 0.8124644160270691, "step": 801 }, { "epoch": 1.45, "learning_rate": 5.5116374191159986e-08, "logits/chosen": -2.308680534362793, "logits/rejected": -2.3055179119110107, "logps/chosen": -4.4361467361450195, "logps/rejected": -2.5768141746520996, "loss": 0.4658, "rewards/accuracies": 1.0, "rewards/chosen": 0.792045533657074, "rewards/margins": 0.37774285674095154, "rewards/rejected": 0.41430267691612244, "step": 802 }, { "epoch": 1.45, "learning_rate": 5.5019251333777014e-08, "logits/chosen": -2.332791328430176, "logits/rejected": -2.3322317600250244, "logps/chosen": -1.1181564331054688, "logps/rejected": -2.2403197288513184, "loss": 0.6192, "rewards/accuracies": 1.0, "rewards/chosen": 0.4696427285671234, "rewards/margins": 0.004822969436645508, "rewards/rejected": 0.4648197591304779, "step": 803 }, { "epoch": 1.45, "learning_rate": 5.492210934148234e-08, "logits/chosen": -2.3203940391540527, "logits/rejected": -2.3227040767669678, "logps/chosen": -6.675947189331055, "logps/rejected": -9.171845436096191, "loss": 0.7574, "rewards/accuracies": 0.0, "rewards/chosen": 0.5840513110160828, "rewards/margins": -0.2632451057434082, "rewards/rejected": 0.847296416759491, "step": 804 }, { "epoch": 1.46, "learning_rate": 5.4824948584610784e-08, "logits/chosen": -2.2102878093719482, "logits/rejected": -2.2167441844940186, "logps/chosen": -3.2493832111358643, "logps/rejected": -3.0074617862701416, "loss": 0.713, "rewards/accuracies": 0.0, "rewards/chosen": 0.8099223971366882, "rewards/margins": -0.10839074850082397, "rewards/rejected": 0.9183131456375122, "step": 805 }, { "epoch": 1.46, "learning_rate": 5.472776943356866e-08, "logits/chosen": -2.331880569458008, "logits/rejected": -2.3007071018218994, "logps/chosen": -2.7258920669555664, "logps/rejected": 0.0, "loss": 0.4334, "rewards/accuracies": 1.0, "rewards/chosen": 0.6949715614318848, "rewards/margins": 0.6949715614318848, "rewards/rejected": 0.0, "step": 806 }, { "epoch": 1.46, "learning_rate": 5.4630572258832454e-08, "logits/chosen": -2.2652978897094727, "logits/rejected": -2.342142343521118, "logps/chosen": -1.3779067993164062, "logps/rejected": -33.293270111083984, "loss": 0.3301, "rewards/accuracies": 1.0, "rewards/chosen": 0.967786967754364, "rewards/margins": 1.1011030673980713, "rewards/rejected": -0.1333160400390625, "step": 807 }, { "epoch": 1.46, "learning_rate": 5.4533357430947336e-08, "logits/chosen": -2.416811227798462, "logits/rejected": -2.4229090213775635, "logps/chosen": -2.51761794090271, "logps/rejected": -7.900903701782227, "loss": 0.6647, "rewards/accuracies": 0.0, "rewards/chosen": 0.9055169224739075, "rewards/margins": -0.1263323426246643, "rewards/rejected": 1.0318492650985718, "step": 808 }, { "epoch": 1.46, "learning_rate": 5.443612532052576e-08, "logits/chosen": -2.3302457332611084, "logits/rejected": -2.3352372646331787, "logps/chosen": -1.522733449935913, "logps/rejected": -10.830604553222656, "loss": 0.6264, "rewards/accuracies": 1.0, "rewards/chosen": 0.5451598167419434, "rewards/margins": 0.13912907242774963, "rewards/rejected": 0.4060307443141937, "step": 809 }, { "epoch": 1.46, "learning_rate": 5.433887629824609e-08, "logits/chosen": -2.367288827896118, "logits/rejected": -2.3686490058898926, "logps/chosen": -1.5886621475219727, "logps/rejected": -1.047074317932129, "loss": 0.6824, "rewards/accuracies": 0.0, "rewards/chosen": 0.5553874969482422, "rewards/margins": -0.0949813723564148, "rewards/rejected": 0.650368869304657, "step": 810 }, { "epoch": 1.47, "learning_rate": 5.424161073485119e-08, "logits/chosen": -2.3634331226348877, "logits/rejected": -2.360858201980591, "logps/chosen": -8.97290325164795, "logps/rejected": -3.908214807510376, "loss": 0.6271, "rewards/accuracies": 0.0, "rewards/chosen": 0.730323314666748, "rewards/margins": -0.05442190170288086, "rewards/rejected": 0.7847452163696289, "step": 811 }, { "epoch": 1.47, "learning_rate": 5.41443290011469e-08, "logits/chosen": -2.2350542545318604, "logits/rejected": -2.23478627204895, "logps/chosen": -1.693684458732605, "logps/rejected": -0.9726876020431519, "loss": 0.7053, "rewards/accuracies": 1.0, "rewards/chosen": 0.5878969430923462, "rewards/margins": 0.16250306367874146, "rewards/rejected": 0.42539387941360474, "step": 812 }, { "epoch": 1.47, "learning_rate": 5.404703146800078e-08, "logits/chosen": -2.4371464252471924, "logits/rejected": -2.427661180496216, "logps/chosen": -3.465757131576538, "logps/rejected": -2.333634853363037, "loss": 0.6659, "rewards/accuracies": 1.0, "rewards/chosen": 0.7895159125328064, "rewards/margins": 0.11223942041397095, "rewards/rejected": 0.6772764921188354, "step": 813 }, { "epoch": 1.47, "learning_rate": 5.394971850634064e-08, "logits/chosen": -2.3056352138519287, "logits/rejected": -2.3018300533294678, "logps/chosen": -1.3176239728927612, "logps/rejected": -5.078296184539795, "loss": 0.5858, "rewards/accuracies": 0.0, "rewards/chosen": 0.5988139510154724, "rewards/margins": -0.026400208473205566, "rewards/rejected": 0.625214159488678, "step": 814 }, { "epoch": 1.47, "learning_rate": 5.3852390487153e-08, "logits/chosen": -2.345909595489502, "logits/rejected": -2.3218960762023926, "logps/chosen": -0.68576979637146, "logps/rejected": 0.0, "loss": 0.5514, "rewards/accuracies": 1.0, "rewards/chosen": 0.5429635047912598, "rewards/margins": 0.5429635047912598, "rewards/rejected": 0.0, "step": 815 }, { "epoch": 1.48, "learning_rate": 5.375504778148189e-08, "logits/chosen": -2.3986172676086426, "logits/rejected": -2.403249502182007, "logps/chosen": -1.2807323932647705, "logps/rejected": -2.1200180053710938, "loss": 0.6571, "rewards/accuracies": 1.0, "rewards/chosen": 0.6242722868919373, "rewards/margins": 0.14992326498031616, "rewards/rejected": 0.4743490219116211, "step": 816 }, { "epoch": 1.48, "learning_rate": 5.3657690760427286e-08, "logits/chosen": -2.2283053398132324, "logits/rejected": -2.1988112926483154, "logps/chosen": -2.107357978820801, "logps/rejected": 0.0, "loss": 0.4586, "rewards/accuracies": 1.0, "rewards/chosen": 0.5606603622436523, "rewards/margins": 0.5606603622436523, "rewards/rejected": 0.0, "step": 817 }, { "epoch": 1.48, "learning_rate": 5.356031979514375e-08, "logits/chosen": -2.392622709274292, "logits/rejected": -2.4016518592834473, "logps/chosen": -1.785326600074768, "logps/rejected": -9.91224479675293, "loss": 0.7243, "rewards/accuracies": 1.0, "rewards/chosen": 0.831433892250061, "rewards/margins": 0.14809763431549072, "rewards/rejected": 0.6833362579345703, "step": 818 }, { "epoch": 1.48, "learning_rate": 5.3462935256838994e-08, "logits/chosen": -2.339730978012085, "logits/rejected": -2.352064609527588, "logps/chosen": -2.180698871612549, "logps/rejected": -5.931738376617432, "loss": 0.6521, "rewards/accuracies": 1.0, "rewards/chosen": 0.6792981624603271, "rewards/margins": 0.21284273266792297, "rewards/rejected": 0.4664554297924042, "step": 819 }, { "epoch": 1.48, "learning_rate": 5.336553751677246e-08, "logits/chosen": -2.39462947845459, "logits/rejected": -2.402000665664673, "logps/chosen": -3.104170322418213, "logps/rejected": -1.226153016090393, "loss": 0.7981, "rewards/accuracies": 0.0, "rewards/chosen": 0.4640345275402069, "rewards/margins": -0.10328981280326843, "rewards/rejected": 0.5673243403434753, "step": 820 }, { "epoch": 1.48, "learning_rate": 5.326812694625397e-08, "logits/chosen": -2.445509672164917, "logits/rejected": -2.428983449935913, "logps/chosen": -14.592547416687012, "logps/rejected": -3.6030609607696533, "loss": 0.688, "rewards/accuracies": 0.0, "rewards/chosen": 0.4630461633205414, "rewards/margins": -0.23934897780418396, "rewards/rejected": 0.7023951411247253, "step": 821 }, { "epoch": 1.49, "learning_rate": 5.31707039166422e-08, "logits/chosen": -2.3554787635803223, "logits/rejected": -2.3583946228027344, "logps/chosen": -2.5248284339904785, "logps/rejected": -2.792274236679077, "loss": 0.67, "rewards/accuracies": 1.0, "rewards/chosen": 0.7236551642417908, "rewards/margins": 0.060544610023498535, "rewards/rejected": 0.6631105542182922, "step": 822 }, { "epoch": 1.49, "learning_rate": 5.3073268799343365e-08, "logits/chosen": -2.4073445796966553, "logits/rejected": -2.404240131378174, "logps/chosen": -2.462796926498413, "logps/rejected": -6.039703369140625, "loss": 0.5583, "rewards/accuracies": 1.0, "rewards/chosen": 0.8417148590087891, "rewards/margins": 0.3006629943847656, "rewards/rejected": 0.5410518646240234, "step": 823 }, { "epoch": 1.49, "learning_rate": 5.2975821965809734e-08, "logits/chosen": -2.248419761657715, "logits/rejected": -2.249976396560669, "logps/chosen": -3.0269412994384766, "logps/rejected": -10.622191429138184, "loss": 0.7647, "rewards/accuracies": 0.0, "rewards/chosen": 0.7078558206558228, "rewards/margins": -0.25787991285324097, "rewards/rejected": 0.9657357335090637, "step": 824 }, { "epoch": 1.49, "learning_rate": 5.287836378753826e-08, "logits/chosen": -2.263871908187866, "logits/rejected": -2.355928897857666, "logps/chosen": -3.285465717315674, "logps/rejected": -25.796430587768555, "loss": 0.7602, "rewards/accuracies": 1.0, "rewards/chosen": 0.6904473900794983, "rewards/margins": 0.027236878871917725, "rewards/rejected": 0.6632105112075806, "step": 825 }, { "epoch": 1.49, "learning_rate": 5.278089463606916e-08, "logits/chosen": -2.2881810665130615, "logits/rejected": -2.293552875518799, "logps/chosen": -2.7142984867095947, "logps/rejected": -5.2109527587890625, "loss": 0.643, "rewards/accuracies": 1.0, "rewards/chosen": 0.7403766512870789, "rewards/margins": 0.3464583158493042, "rewards/rejected": 0.39391833543777466, "step": 826 }, { "epoch": 1.5, "learning_rate": 5.2683414882984456e-08, "logits/chosen": -2.3439996242523193, "logits/rejected": -2.3428800106048584, "logps/chosen": -2.7702696323394775, "logps/rejected": -5.789344787597656, "loss": 0.5128, "rewards/accuracies": 1.0, "rewards/chosen": 0.814130425453186, "rewards/margins": 0.2847477197647095, "rewards/rejected": 0.5293827056884766, "step": 827 }, { "epoch": 1.5, "learning_rate": 5.258592489990658e-08, "logits/chosen": -2.267913818359375, "logits/rejected": -2.271108627319336, "logps/chosen": -2.3419947624206543, "logps/rejected": -4.534208297729492, "loss": 0.7156, "rewards/accuracies": 0.0, "rewards/chosen": 0.47624287009239197, "rewards/margins": -0.08788612484931946, "rewards/rejected": 0.5641289949417114, "step": 828 }, { "epoch": 1.5, "learning_rate": 5.248842505849699e-08, "logits/chosen": -2.390791654586792, "logits/rejected": -2.3863701820373535, "logps/chosen": -1.7918485403060913, "logps/rejected": -9.343615531921387, "loss": 0.6702, "rewards/accuracies": 0.0, "rewards/chosen": 0.7577451467514038, "rewards/margins": -0.023684442043304443, "rewards/rejected": 0.7814295887947083, "step": 829 }, { "epoch": 1.5, "learning_rate": 5.239091573045471e-08, "logits/chosen": -2.2917866706848145, "logits/rejected": -2.418867349624634, "logps/chosen": -1.9758628606796265, "logps/rejected": -32.41851043701172, "loss": 0.5709, "rewards/accuracies": 1.0, "rewards/chosen": 0.5884309411048889, "rewards/margins": 0.1132810115814209, "rewards/rejected": 0.475149929523468, "step": 830 }, { "epoch": 1.5, "learning_rate": 5.229339728751496e-08, "logits/chosen": -2.22965669631958, "logits/rejected": -2.22076416015625, "logps/chosen": -8.142732620239258, "logps/rejected": -9.078545570373535, "loss": 0.8016, "rewards/accuracies": 0.0, "rewards/chosen": 0.4805983603000641, "rewards/margins": -0.3821741044521332, "rewards/rejected": 0.8627724647521973, "step": 831 }, { "epoch": 1.5, "learning_rate": 5.2195870101447645e-08, "logits/chosen": -2.402371644973755, "logits/rejected": -2.5454771518707275, "logps/chosen": -4.193721771240234, "logps/rejected": -32.35310363769531, "loss": 0.5855, "rewards/accuracies": 1.0, "rewards/chosen": 0.5403196215629578, "rewards/margins": 0.3633916676044464, "rewards/rejected": 0.17692795395851135, "step": 832 }, { "epoch": 1.51, "learning_rate": 5.20983345440561e-08, "logits/chosen": -2.3762240409851074, "logits/rejected": -2.374985456466675, "logps/chosen": -1.3973753452301025, "logps/rejected": -7.989764213562012, "loss": 0.6957, "rewards/accuracies": 0.0, "rewards/chosen": 0.6311947107315063, "rewards/margins": -0.2904496192932129, "rewards/rejected": 0.9216443300247192, "step": 833 }, { "epoch": 1.51, "learning_rate": 5.200079098717547e-08, "logits/chosen": -2.280641794204712, "logits/rejected": -2.4296188354492188, "logps/chosen": -1.4726914167404175, "logps/rejected": -23.29397201538086, "loss": 0.6485, "rewards/accuracies": 1.0, "rewards/chosen": 0.7428076863288879, "rewards/margins": 0.3514975607395172, "rewards/rejected": 0.3913101255893707, "step": 834 }, { "epoch": 1.51, "learning_rate": 5.190323980267144e-08, "logits/chosen": -2.2825891971588135, "logits/rejected": -2.283399820327759, "logps/chosen": -1.2151442766189575, "logps/rejected": -3.246025562286377, "loss": 0.6607, "rewards/accuracies": 1.0, "rewards/chosen": 0.6250908374786377, "rewards/margins": 0.10952520370483398, "rewards/rejected": 0.5155656337738037, "step": 835 }, { "epoch": 1.51, "learning_rate": 5.18056813624388e-08, "logits/chosen": -2.3168134689331055, "logits/rejected": -2.3267581462860107, "logps/chosen": -2.3938100337982178, "logps/rejected": -9.156449317932129, "loss": 0.6361, "rewards/accuracies": 1.0, "rewards/chosen": 0.7056485414505005, "rewards/margins": 0.24272757768630981, "rewards/rejected": 0.4629209637641907, "step": 836 }, { "epoch": 1.51, "learning_rate": 5.170811603839996e-08, "logits/chosen": -2.315300941467285, "logits/rejected": -2.319275140762329, "logps/chosen": -2.002483606338501, "logps/rejected": -2.558210849761963, "loss": 0.5769, "rewards/accuracies": 1.0, "rewards/chosen": 0.9844968914985657, "rewards/margins": 0.37846606969833374, "rewards/rejected": 0.6060308218002319, "step": 837 }, { "epoch": 1.52, "learning_rate": 5.1610544202503606e-08, "logits/chosen": -2.3814613819122314, "logits/rejected": -2.3826074600219727, "logps/chosen": -3.465651750564575, "logps/rejected": -4.9565582275390625, "loss": 0.4382, "rewards/accuracies": 1.0, "rewards/chosen": 0.6797143220901489, "rewards/margins": 0.25450289249420166, "rewards/rejected": 0.42521142959594727, "step": 838 }, { "epoch": 1.52, "learning_rate": 5.151296622672322e-08, "logits/chosen": -2.3591880798339844, "logits/rejected": -2.3625540733337402, "logps/chosen": -1.9077637195587158, "logps/rejected": -4.698790550231934, "loss": 0.6854, "rewards/accuracies": 1.0, "rewards/chosen": 0.8365689516067505, "rewards/margins": 0.48228874802589417, "rewards/rejected": 0.3542802035808563, "step": 839 }, { "epoch": 1.52, "learning_rate": 5.1415382483055727e-08, "logits/chosen": -2.2884154319763184, "logits/rejected": -2.292578935623169, "logps/chosen": -5.059239387512207, "logps/rejected": -1.3872448205947876, "loss": 0.5305, "rewards/accuracies": 1.0, "rewards/chosen": 0.5962578058242798, "rewards/margins": 0.028895437717437744, "rewards/rejected": 0.567362368106842, "step": 840 }, { "epoch": 1.52, "learning_rate": 5.131779334351997e-08, "logits/chosen": -2.3458235263824463, "logits/rejected": -2.408158779144287, "logps/chosen": -1.9776428937911987, "logps/rejected": -20.472759246826172, "loss": 0.6301, "rewards/accuracies": 1.0, "rewards/chosen": 0.5630915760993958, "rewards/margins": 0.14669600129127502, "rewards/rejected": 0.4163955748081207, "step": 841 }, { "epoch": 1.52, "learning_rate": 5.122019918015546e-08, "logits/chosen": -2.2274017333984375, "logits/rejected": -2.259852886199951, "logps/chosen": 0.0, "logps/rejected": -1.336710810661316, "loss": 0.8619, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": -0.5413293242454529, "rewards/rejected": 0.5413293242454529, "step": 842 }, { "epoch": 1.52, "learning_rate": 5.1122600365020794e-08, "logits/chosen": -2.2527894973754883, "logits/rejected": -2.2595105171203613, "logps/chosen": -1.4774727821350098, "logps/rejected": -2.4092068672180176, "loss": 0.6106, "rewards/accuracies": 1.0, "rewards/chosen": 0.6849722266197205, "rewards/margins": 0.16386526823043823, "rewards/rejected": 0.5211069583892822, "step": 843 }, { "epoch": 1.53, "learning_rate": 5.1024997270192324e-08, "logits/chosen": -2.2441980838775635, "logits/rejected": -2.227555274963379, "logps/chosen": -12.22037124633789, "logps/rejected": -5.217483997344971, "loss": 0.6042, "rewards/accuracies": 1.0, "rewards/chosen": 0.7004894614219666, "rewards/margins": 0.32772719860076904, "rewards/rejected": 0.3727622628211975, "step": 844 }, { "epoch": 1.53, "learning_rate": 5.092739026776271e-08, "logits/chosen": -2.333284854888916, "logits/rejected": -2.4510931968688965, "logps/chosen": -4.027856826782227, "logps/rejected": -31.11920166015625, "loss": 0.6548, "rewards/accuracies": 1.0, "rewards/chosen": 0.8009605407714844, "rewards/margins": 0.43155401945114136, "rewards/rejected": 0.369406521320343, "step": 845 }, { "epoch": 1.53, "learning_rate": 5.082977972983952e-08, "logits/chosen": -2.410997152328491, "logits/rejected": -2.3570635318756104, "logps/chosen": -19.17654037475586, "logps/rejected": -2.1347479820251465, "loss": 0.6855, "rewards/accuracies": 0.0, "rewards/chosen": 0.5194852948188782, "rewards/margins": -0.09436309337615967, "rewards/rejected": 0.6138483881950378, "step": 846 }, { "epoch": 1.53, "learning_rate": 5.073216602854377e-08, "logits/chosen": -2.405057191848755, "logits/rejected": -2.4078032970428467, "logps/chosen": -1.5778545141220093, "logps/rejected": -2.1319522857666016, "loss": 0.5585, "rewards/accuracies": 1.0, "rewards/chosen": 0.6043747663497925, "rewards/margins": 0.07776379585266113, "rewards/rejected": 0.5266109704971313, "step": 847 }, { "epoch": 1.53, "learning_rate": 5.063454953600857e-08, "logits/chosen": -2.2117087841033936, "logits/rejected": -2.230210542678833, "logps/chosen": -9.011734008789062, "logps/rejected": -11.975178718566895, "loss": 0.6175, "rewards/accuracies": 0.0, "rewards/chosen": 0.26963862776756287, "rewards/margins": -0.1387467384338379, "rewards/rejected": 0.40838536620140076, "step": 848 }, { "epoch": 1.54, "learning_rate": 5.0536930624377706e-08, "logits/chosen": -2.28106689453125, "logits/rejected": -2.2849416732788086, "logps/chosen": -2.8204469680786133, "logps/rejected": -2.132533073425293, "loss": 0.7314, "rewards/accuracies": 0.0, "rewards/chosen": 0.46564531326293945, "rewards/margins": -0.008257120847702026, "rewards/rejected": 0.4739024341106415, "step": 849 }, { "epoch": 1.54, "learning_rate": 5.0439309665804056e-08, "logits/chosen": -2.2632648944854736, "logits/rejected": -2.2630579471588135, "logps/chosen": -4.339328289031982, "logps/rejected": -6.34792423248291, "loss": 0.5315, "rewards/accuracies": 1.0, "rewards/chosen": 0.9533695578575134, "rewards/margins": 0.4504709243774414, "rewards/rejected": 0.502898633480072, "step": 850 }, { "epoch": 1.54, "learning_rate": 5.034168703244847e-08, "logits/chosen": -2.479281187057495, "logits/rejected": -2.4768760204315186, "logps/chosen": -3.4801065921783447, "logps/rejected": -2.5829455852508545, "loss": 0.7311, "rewards/accuracies": 0.0, "rewards/chosen": 0.877507209777832, "rewards/margins": -0.0027634501457214355, "rewards/rejected": 0.8802706599235535, "step": 851 }, { "epoch": 1.54, "learning_rate": 5.0244063096478025e-08, "logits/chosen": -2.1493747234344482, "logits/rejected": -2.156315326690674, "logps/chosen": -0.7247973680496216, "logps/rejected": -3.5565600395202637, "loss": 0.5236, "rewards/accuracies": 1.0, "rewards/chosen": 0.6797199845314026, "rewards/margins": 0.21687278151512146, "rewards/rejected": 0.46284720301628113, "step": 852 }, { "epoch": 1.54, "learning_rate": 5.0146438230064894e-08, "logits/chosen": -2.3508105278015137, "logits/rejected": -2.3545353412628174, "logps/chosen": -2.184922456741333, "logps/rejected": -5.912635803222656, "loss": 0.7703, "rewards/accuracies": 0.0, "rewards/chosen": 0.8084937334060669, "rewards/margins": -0.25980353355407715, "rewards/rejected": 1.068297266960144, "step": 853 }, { "epoch": 1.54, "learning_rate": 5.004881280538471e-08, "logits/chosen": -2.3078317642211914, "logits/rejected": -2.3157765865325928, "logps/chosen": -2.7567760944366455, "logps/rejected": -4.719048500061035, "loss": 0.6501, "rewards/accuracies": 1.0, "rewards/chosen": 0.6101260185241699, "rewards/margins": 0.15030375123023987, "rewards/rejected": 0.45982226729393005, "step": 854 }, { "epoch": 1.55, "learning_rate": 4.995118719461528e-08, "logits/chosen": -2.297023296356201, "logits/rejected": -2.3368778228759766, "logps/chosen": -3.555382013320923, "logps/rejected": -30.696704864501953, "loss": 0.6644, "rewards/accuracies": 1.0, "rewards/chosen": 0.7218658328056335, "rewards/margins": 0.2493499517440796, "rewards/rejected": 0.47251588106155396, "step": 855 }, { "epoch": 1.55, "learning_rate": 4.985356176993511e-08, "logits/chosen": -2.4679501056671143, "logits/rejected": -2.4679501056671143, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 856 }, { "epoch": 1.55, "learning_rate": 4.975593690352197e-08, "logits/chosen": -2.2899715900421143, "logits/rejected": -2.4092187881469727, "logps/chosen": -1.6848770380020142, "logps/rejected": -49.08765411376953, "loss": 0.561, "rewards/accuracies": 1.0, "rewards/chosen": 0.6457362771034241, "rewards/margins": 0.2278548777103424, "rewards/rejected": 0.41788139939308167, "step": 857 }, { "epoch": 1.55, "learning_rate": 4.9658312967551555e-08, "logits/chosen": -2.251349925994873, "logits/rejected": -2.3435800075531006, "logps/chosen": -2.7861533164978027, "logps/rejected": -26.658382415771484, "loss": 0.5645, "rewards/accuracies": 1.0, "rewards/chosen": 0.42293307185173035, "rewards/margins": 0.18975131213665009, "rewards/rejected": 0.23318175971508026, "step": 858 }, { "epoch": 1.55, "learning_rate": 4.956069033419593e-08, "logits/chosen": -2.371250629425049, "logits/rejected": -2.3744661808013916, "logps/chosen": -1.0632925033569336, "logps/rejected": -3.0811657905578613, "loss": 0.5187, "rewards/accuracies": 1.0, "rewards/chosen": 0.7675704956054688, "rewards/margins": 0.283359557390213, "rewards/rejected": 0.48421093821525574, "step": 859 }, { "epoch": 1.56, "learning_rate": 4.94630693756223e-08, "logits/chosen": -2.1739118099212646, "logits/rejected": -2.1769185066223145, "logps/chosen": -4.012244701385498, "logps/rejected": -8.963191032409668, "loss": 0.7213, "rewards/accuracies": 0.0, "rewards/chosen": 0.7582495212554932, "rewards/margins": -0.12281328439712524, "rewards/rejected": 0.8810628056526184, "step": 860 }, { "epoch": 1.56, "learning_rate": 4.9365450463991424e-08, "logits/chosen": -2.321763038635254, "logits/rejected": -2.321239709854126, "logps/chosen": -0.9084007740020752, "logps/rejected": -6.420858383178711, "loss": 0.6681, "rewards/accuracies": 0.0, "rewards/chosen": 0.7796244621276855, "rewards/margins": -0.10566979646682739, "rewards/rejected": 0.8852942585945129, "step": 861 }, { "epoch": 1.56, "learning_rate": 4.9267833971456235e-08, "logits/chosen": -2.385406494140625, "logits/rejected": -2.386080026626587, "logps/chosen": -6.102738380432129, "logps/rejected": -6.055910587310791, "loss": 0.6126, "rewards/accuracies": 1.0, "rewards/chosen": 0.9718852043151855, "rewards/margins": 0.44081586599349976, "rewards/rejected": 0.5310693383216858, "step": 862 }, { "epoch": 1.56, "learning_rate": 4.9170220270160504e-08, "logits/chosen": -2.199566602706909, "logits/rejected": -2.2094552516937256, "logps/chosen": -1.9267668724060059, "logps/rejected": -11.394298553466797, "loss": 0.6388, "rewards/accuracies": 1.0, "rewards/chosen": 0.5843376517295837, "rewards/margins": 0.3445051312446594, "rewards/rejected": 0.23983250558376312, "step": 863 }, { "epoch": 1.56, "learning_rate": 4.907260973223729e-08, "logits/chosen": -2.274062156677246, "logits/rejected": -2.2763919830322266, "logps/chosen": -1.7706080675125122, "logps/rejected": -5.197568893432617, "loss": 0.5548, "rewards/accuracies": 1.0, "rewards/chosen": 0.6760085225105286, "rewards/margins": 0.46379613876342773, "rewards/rejected": 0.21221236884593964, "step": 864 }, { "epoch": 1.56, "learning_rate": 4.897500272980768e-08, "logits/chosen": -2.2904183864593506, "logits/rejected": -2.2962467670440674, "logps/chosen": -1.0037447214126587, "logps/rejected": -2.2816009521484375, "loss": 0.5516, "rewards/accuracies": 1.0, "rewards/chosen": 0.5857567191123962, "rewards/margins": 0.15369123220443726, "rewards/rejected": 0.432065486907959, "step": 865 }, { "epoch": 1.57, "learning_rate": 4.887739963497921e-08, "logits/chosen": -2.2765142917633057, "logits/rejected": -2.302151918411255, "logps/chosen": 0.0, "logps/rejected": -0.9590244293212891, "loss": 0.7965, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": -0.5509170293807983, "rewards/rejected": 0.5509170293807983, "step": 866 }, { "epoch": 1.57, "learning_rate": 4.8779800819844546e-08, "logits/chosen": -2.4542019367218018, "logits/rejected": -2.444849967956543, "logps/chosen": -4.988303184509277, "logps/rejected": -3.717709541320801, "loss": 0.6943, "rewards/accuracies": 1.0, "rewards/chosen": 1.0180672407150269, "rewards/margins": 0.16816353797912598, "rewards/rejected": 0.8499037027359009, "step": 867 }, { "epoch": 1.57, "learning_rate": 4.8682206656480026e-08, "logits/chosen": -2.4059906005859375, "logits/rejected": -2.407627582550049, "logps/chosen": -2.370722770690918, "logps/rejected": -2.1757984161376953, "loss": 0.609, "rewards/accuracies": 1.0, "rewards/chosen": 0.7496066093444824, "rewards/margins": 0.03126579523086548, "rewards/rejected": 0.7183408141136169, "step": 868 }, { "epoch": 1.57, "learning_rate": 4.8584617516944275e-08, "logits/chosen": -2.291025161743164, "logits/rejected": -2.3670527935028076, "logps/chosen": -2.0273094177246094, "logps/rejected": -24.420352935791016, "loss": 0.6588, "rewards/accuracies": 0.0, "rewards/chosen": 0.5661432147026062, "rewards/margins": -0.21410775184631348, "rewards/rejected": 0.7802509665489197, "step": 869 }, { "epoch": 1.57, "learning_rate": 4.848703377327678e-08, "logits/chosen": -2.3524820804595947, "logits/rejected": -2.4674789905548096, "logps/chosen": -1.8438633680343628, "logps/rejected": -28.098384857177734, "loss": 0.6124, "rewards/accuracies": 1.0, "rewards/chosen": 0.6272608041763306, "rewards/margins": 0.3798999786376953, "rewards/rejected": 0.24736081063747406, "step": 870 }, { "epoch": 1.58, "learning_rate": 4.83894557974964e-08, "logits/chosen": -2.2761645317077637, "logits/rejected": -2.2792551517486572, "logps/chosen": -2.5005557537078857, "logps/rejected": -7.12923526763916, "loss": 0.586, "rewards/accuracies": 1.0, "rewards/chosen": 0.6531476974487305, "rewards/margins": 0.13408327102661133, "rewards/rejected": 0.5190644264221191, "step": 871 }, { "epoch": 1.58, "learning_rate": 4.8291883961600035e-08, "logits/chosen": -2.3632380962371826, "logits/rejected": -2.3686535358428955, "logps/chosen": -1.5890811681747437, "logps/rejected": -2.0299384593963623, "loss": 0.6906, "rewards/accuracies": 1.0, "rewards/chosen": 0.6880625486373901, "rewards/margins": 0.17291730642318726, "rewards/rejected": 0.5151452422142029, "step": 872 }, { "epoch": 1.58, "learning_rate": 4.81943186375612e-08, "logits/chosen": -2.3356058597564697, "logits/rejected": -2.3422281742095947, "logps/chosen": -3.138089656829834, "logps/rejected": -2.2966532707214355, "loss": 0.6334, "rewards/accuracies": 0.0, "rewards/chosen": 0.651248037815094, "rewards/margins": -0.06037473678588867, "rewards/rejected": 0.7116227746009827, "step": 873 }, { "epoch": 1.58, "learning_rate": 4.809676019732857e-08, "logits/chosen": -2.3460333347320557, "logits/rejected": -2.3597464561462402, "logps/chosen": -1.4647595882415771, "logps/rejected": -4.544454574584961, "loss": 0.5628, "rewards/accuracies": 1.0, "rewards/chosen": 0.9013635516166687, "rewards/margins": 0.2232324481010437, "rewards/rejected": 0.678131103515625, "step": 874 }, { "epoch": 1.58, "learning_rate": 4.7999209012824544e-08, "logits/chosen": -2.3604817390441895, "logits/rejected": -2.3640503883361816, "logps/chosen": -1.0328905582427979, "logps/rejected": -4.945307731628418, "loss": 0.7181, "rewards/accuracies": 1.0, "rewards/chosen": 0.6861802339553833, "rewards/margins": 0.22323039174079895, "rewards/rejected": 0.46294984221458435, "step": 875 }, { "epoch": 1.58, "learning_rate": 4.7901665455943916e-08, "logits/chosen": -2.3102264404296875, "logits/rejected": -2.3113114833831787, "logps/chosen": -1.3507964611053467, "logps/rejected": -1.4366521835327148, "loss": 0.6261, "rewards/accuracies": 1.0, "rewards/chosen": 0.7421127557754517, "rewards/margins": 0.21520572900772095, "rewards/rejected": 0.5269070267677307, "step": 876 }, { "epoch": 1.59, "learning_rate": 4.780412989855234e-08, "logits/chosen": -2.2260689735412598, "logits/rejected": -2.2353100776672363, "logps/chosen": -1.8632408380508423, "logps/rejected": -3.6964120864868164, "loss": 0.7449, "rewards/accuracies": 0.0, "rewards/chosen": 0.5343981385231018, "rewards/margins": -0.1857283115386963, "rewards/rejected": 0.7201264500617981, "step": 877 }, { "epoch": 1.59, "learning_rate": 4.7706602712485036e-08, "logits/chosen": -2.2593140602111816, "logits/rejected": -2.3997132778167725, "logps/chosen": -1.7029436826705933, "logps/rejected": -33.38299560546875, "loss": 0.5975, "rewards/accuracies": 1.0, "rewards/chosen": 0.7938753366470337, "rewards/margins": 0.13099539279937744, "rewards/rejected": 0.6628799438476562, "step": 878 }, { "epoch": 1.59, "learning_rate": 4.760908426954529e-08, "logits/chosen": -2.3273770809173584, "logits/rejected": -2.2960081100463867, "logps/chosen": -1.4211148023605347, "logps/rejected": 0.0, "loss": 0.4992, "rewards/accuracies": 1.0, "rewards/chosen": 0.6970698237419128, "rewards/margins": 0.6970698237419128, "rewards/rejected": 0.0, "step": 879 }, { "epoch": 1.59, "learning_rate": 4.751157494150303e-08, "logits/chosen": -2.2737045288085938, "logits/rejected": -2.2838006019592285, "logps/chosen": -2.001279354095459, "logps/rejected": -2.253303050994873, "loss": 0.6551, "rewards/accuracies": 1.0, "rewards/chosen": 0.6528273224830627, "rewards/margins": 0.23274469375610352, "rewards/rejected": 0.42008262872695923, "step": 880 }, { "epoch": 1.59, "learning_rate": 4.7414075100093424e-08, "logits/chosen": -2.370978593826294, "logits/rejected": -2.3606116771698, "logps/chosen": -0.8152732253074646, "logps/rejected": -14.982634544372559, "loss": 0.6521, "rewards/accuracies": 1.0, "rewards/chosen": 0.5213002562522888, "rewards/margins": 0.5319046378135681, "rewards/rejected": -0.010604381561279297, "step": 881 }, { "epoch": 1.59, "learning_rate": 4.731658511701555e-08, "logits/chosen": -2.4127602577209473, "logits/rejected": -2.4083213806152344, "logps/chosen": -1.2879831790924072, "logps/rejected": -1.9318865537643433, "loss": 0.6716, "rewards/accuracies": 0.0, "rewards/chosen": 0.4386022686958313, "rewards/margins": -0.1480376124382019, "rewards/rejected": 0.5866398811340332, "step": 882 }, { "epoch": 1.6, "learning_rate": 4.721910536393085e-08, "logits/chosen": -2.3224120140075684, "logits/rejected": -2.338674545288086, "logps/chosen": -1.2698475122451782, "logps/rejected": -12.111763000488281, "loss": 0.6337, "rewards/accuracies": 1.0, "rewards/chosen": 0.5175581574440002, "rewards/margins": 0.256603866815567, "rewards/rejected": 0.2609542906284332, "step": 883 }, { "epoch": 1.6, "learning_rate": 4.712163621246174e-08, "logits/chosen": -2.2581140995025635, "logits/rejected": -2.2540011405944824, "logps/chosen": -8.26362419128418, "logps/rejected": -1.6608567237854004, "loss": 0.7899, "rewards/accuracies": 0.0, "rewards/chosen": 0.2912571132183075, "rewards/margins": -0.1867603063583374, "rewards/rejected": 0.4780174195766449, "step": 884 }, { "epoch": 1.6, "learning_rate": 4.702417803419026e-08, "logits/chosen": -2.4795937538146973, "logits/rejected": -2.4791033267974854, "logps/chosen": -2.0775747299194336, "logps/rejected": -11.264190673828125, "loss": 0.4511, "rewards/accuracies": 1.0, "rewards/chosen": 1.0452321767807007, "rewards/margins": 0.7922999858856201, "rewards/rejected": 0.2529321610927582, "step": 885 }, { "epoch": 1.6, "learning_rate": 4.6926731200656643e-08, "logits/chosen": -2.3469574451446533, "logits/rejected": -2.347891092300415, "logps/chosen": -3.2335333824157715, "logps/rejected": -3.9123919010162354, "loss": 0.7066, "rewards/accuracies": 1.0, "rewards/chosen": 0.6429612040519714, "rewards/margins": 0.14993330836296082, "rewards/rejected": 0.4930278956890106, "step": 886 }, { "epoch": 1.6, "learning_rate": 4.68292960833578e-08, "logits/chosen": -2.2561190128326416, "logits/rejected": -2.255244016647339, "logps/chosen": -1.4998517036437988, "logps/rejected": -5.464852333068848, "loss": 0.6547, "rewards/accuracies": 1.0, "rewards/chosen": 0.7254165410995483, "rewards/margins": 0.3467026352882385, "rewards/rejected": 0.3787139058113098, "step": 887 }, { "epoch": 1.61, "learning_rate": 4.673187305374604e-08, "logits/chosen": -2.336237668991089, "logits/rejected": -2.312180280685425, "logps/chosen": -3.283519744873047, "logps/rejected": 0.0, "loss": 0.5135, "rewards/accuracies": 1.0, "rewards/chosen": 0.9860571026802063, "rewards/margins": 0.9860571026802063, "rewards/rejected": 0.0, "step": 888 }, { "epoch": 1.61, "learning_rate": 4.663446248322755e-08, "logits/chosen": -2.292149543762207, "logits/rejected": -2.273653745651245, "logps/chosen": -3.77614426612854, "logps/rejected": -9.527155876159668, "loss": 0.4204, "rewards/accuracies": 1.0, "rewards/chosen": 1.0476734638214111, "rewards/margins": 0.6392695903778076, "rewards/rejected": 0.4084038734436035, "step": 889 }, { "epoch": 1.61, "learning_rate": 4.6537064743161015e-08, "logits/chosen": -2.3678274154663086, "logits/rejected": -2.3731250762939453, "logps/chosen": -2.8228378295898438, "logps/rejected": -4.604382038116455, "loss": 0.5391, "rewards/accuracies": 1.0, "rewards/chosen": 0.795244038105011, "rewards/margins": 0.07613760232925415, "rewards/rejected": 0.7191064357757568, "step": 890 }, { "epoch": 1.61, "learning_rate": 4.643968020485624e-08, "logits/chosen": -2.3436172008514404, "logits/rejected": -2.3151121139526367, "logps/chosen": -1.0604737997055054, "logps/rejected": 0.0, "loss": 0.481, "rewards/accuracies": 1.0, "rewards/chosen": 0.6866855621337891, "rewards/margins": 0.6866855621337891, "rewards/rejected": 0.0, "step": 891 }, { "epoch": 1.61, "learning_rate": 4.6342309239572716e-08, "logits/chosen": -2.437816858291626, "logits/rejected": -2.4965343475341797, "logps/chosen": -1.741945505142212, "logps/rejected": -32.37261962890625, "loss": 0.6574, "rewards/accuracies": 0.0, "rewards/chosen": 0.8010124564170837, "rewards/margins": -0.08661001920700073, "rewards/rejected": 0.8876224756240845, "step": 892 }, { "epoch": 1.61, "learning_rate": 4.6244952218518115e-08, "logits/chosen": -2.192194938659668, "logits/rejected": -2.1913881301879883, "logps/chosen": -0.9740637540817261, "logps/rejected": -2.201078414916992, "loss": 0.6504, "rewards/accuracies": 1.0, "rewards/chosen": 0.6177470088005066, "rewards/margins": 0.1787731945514679, "rewards/rejected": 0.4389738142490387, "step": 893 }, { "epoch": 1.62, "learning_rate": 4.614760951284699e-08, "logits/chosen": -2.3244943618774414, "logits/rejected": -2.3245904445648193, "logps/chosen": -1.8482410907745361, "logps/rejected": -2.6382577419281006, "loss": 0.6938, "rewards/accuracies": 0.0, "rewards/chosen": 0.5096043944358826, "rewards/margins": -0.17097383737564087, "rewards/rejected": 0.6805782318115234, "step": 894 }, { "epoch": 1.62, "learning_rate": 4.605028149365937e-08, "logits/chosen": -2.1805646419525146, "logits/rejected": -2.1760802268981934, "logps/chosen": -1.0790122747421265, "logps/rejected": -3.305978775024414, "loss": 0.6444, "rewards/accuracies": 1.0, "rewards/chosen": 0.6386412382125854, "rewards/margins": 0.15006405115127563, "rewards/rejected": 0.4885771870613098, "step": 895 }, { "epoch": 1.62, "learning_rate": 4.595296853199922e-08, "logits/chosen": -2.312329053878784, "logits/rejected": -2.313230514526367, "logps/chosen": -3.3365464210510254, "logps/rejected": -3.4156622886657715, "loss": 0.7191, "rewards/accuracies": 0.0, "rewards/chosen": 0.867893636226654, "rewards/margins": -0.03636360168457031, "rewards/rejected": 0.9042572379112244, "step": 896 }, { "epoch": 1.62, "learning_rate": 4.5855670998853106e-08, "logits/chosen": -2.3904378414154053, "logits/rejected": -2.367187261581421, "logps/chosen": -7.228086471557617, "logps/rejected": -8.116750717163086, "loss": 0.6188, "rewards/accuracies": 1.0, "rewards/chosen": 0.9997159838676453, "rewards/margins": 0.3236333727836609, "rewards/rejected": 0.6760826110839844, "step": 897 }, { "epoch": 1.62, "learning_rate": 4.575838926514881e-08, "logits/chosen": -2.3056466579437256, "logits/rejected": -2.3106133937835693, "logps/chosen": -2.0330851078033447, "logps/rejected": -0.8600963354110718, "loss": 0.5578, "rewards/accuracies": 0.0, "rewards/chosen": 0.6755993962287903, "rewards/margins": -0.19808423519134521, "rewards/rejected": 0.8736836314201355, "step": 898 }, { "epoch": 1.63, "learning_rate": 4.56611237017539e-08, "logits/chosen": -2.379455089569092, "logits/rejected": -2.380476236343384, "logps/chosen": -1.49208402633667, "logps/rejected": -2.5799615383148193, "loss": 0.6658, "rewards/accuracies": 1.0, "rewards/chosen": 0.5730605721473694, "rewards/margins": 0.008504748344421387, "rewards/rejected": 0.564555823802948, "step": 899 }, { "epoch": 1.63, "learning_rate": 4.556387467947424e-08, "logits/chosen": -2.2386484146118164, "logits/rejected": -2.2314746379852295, "logps/chosen": -1.049118995666504, "logps/rejected": -5.430126667022705, "loss": 0.6987, "rewards/accuracies": 0.0, "rewards/chosen": 0.5042387843132019, "rewards/margins": -0.009519338607788086, "rewards/rejected": 0.51375812292099, "step": 900 }, { "epoch": 1.63, "learning_rate": 4.546664256905268e-08, "logits/chosen": -2.156026601791382, "logits/rejected": -2.156728744506836, "logps/chosen": -3.393541097640991, "logps/rejected": -1.6268887519836426, "loss": 0.5871, "rewards/accuracies": 1.0, "rewards/chosen": 0.6791216731071472, "rewards/margins": 0.11473733186721802, "rewards/rejected": 0.5643843412399292, "step": 901 }, { "epoch": 1.63, "learning_rate": 4.5369427741167535e-08, "logits/chosen": -2.4191641807556152, "logits/rejected": -2.401343822479248, "logps/chosen": -8.366863250732422, "logps/rejected": 0.0, "loss": 0.4956, "rewards/accuracies": 1.0, "rewards/chosen": 0.7014590501785278, "rewards/margins": 0.7014590501785278, "rewards/rejected": 0.0, "step": 902 }, { "epoch": 1.63, "learning_rate": 4.5272230566431324e-08, "logits/chosen": -2.265411138534546, "logits/rejected": -2.267029285430908, "logps/chosen": -1.7915782928466797, "logps/rejected": -2.3455560207366943, "loss": 0.725, "rewards/accuracies": 0.0, "rewards/chosen": 0.5473960638046265, "rewards/margins": -0.1860293745994568, "rewards/rejected": 0.7334254384040833, "step": 903 }, { "epoch": 1.63, "learning_rate": 4.517505141538922e-08, "logits/chosen": -2.172539710998535, "logits/rejected": -2.14658522605896, "logps/chosen": -6.388941764831543, "logps/rejected": 0.0, "loss": 0.5006, "rewards/accuracies": 1.0, "rewards/chosen": 0.9488690495491028, "rewards/margins": 0.9488690495491028, "rewards/rejected": 0.0, "step": 904 }, { "epoch": 1.64, "learning_rate": 4.5077890658517665e-08, "logits/chosen": -2.3746938705444336, "logits/rejected": -2.362074136734009, "logps/chosen": -1.8182662725448608, "logps/rejected": -9.272358894348145, "loss": 0.5335, "rewards/accuracies": 1.0, "rewards/chosen": 0.6181048154830933, "rewards/margins": 0.462838351726532, "rewards/rejected": 0.15526647865772247, "step": 905 }, { "epoch": 1.64, "learning_rate": 4.498074866622299e-08, "logits/chosen": -2.406569480895996, "logits/rejected": -2.4037599563598633, "logps/chosen": -2.3916666507720947, "logps/rejected": -6.498311996459961, "loss": 0.6413, "rewards/accuracies": 0.0, "rewards/chosen": 0.7603950500488281, "rewards/margins": -0.271889328956604, "rewards/rejected": 1.0322843790054321, "step": 906 }, { "epoch": 1.64, "learning_rate": 4.4883625808839996e-08, "logits/chosen": -2.4094111919403076, "logits/rejected": -2.408863067626953, "logps/chosen": -1.7441394329071045, "logps/rejected": -3.474024534225464, "loss": 0.6245, "rewards/accuracies": 1.0, "rewards/chosen": 0.9682488441467285, "rewards/margins": 0.3754883408546448, "rewards/rejected": 0.5927605032920837, "step": 907 }, { "epoch": 1.64, "learning_rate": 4.4786522456630574e-08, "logits/chosen": -2.312187910079956, "logits/rejected": -2.3120715618133545, "logps/chosen": -3.9592368602752686, "logps/rejected": -2.367579936981201, "loss": 0.5726, "rewards/accuracies": 1.0, "rewards/chosen": 0.8093123435974121, "rewards/margins": 0.3700827658176422, "rewards/rejected": 0.4392295777797699, "step": 908 }, { "epoch": 1.64, "learning_rate": 4.468943897978217e-08, "logits/chosen": -2.331167221069336, "logits/rejected": -2.3895180225372314, "logps/chosen": -1.7527230978012085, "logps/rejected": -28.262569427490234, "loss": 0.6802, "rewards/accuracies": 1.0, "rewards/chosen": 0.7499884366989136, "rewards/margins": 0.3318198025226593, "rewards/rejected": 0.4181686341762543, "step": 909 }, { "epoch": 1.65, "learning_rate": 4.459237574840655e-08, "logits/chosen": -2.2269773483276367, "logits/rejected": -2.232166290283203, "logps/chosen": -2.491379499435425, "logps/rejected": -9.53288745880127, "loss": 0.7753, "rewards/accuracies": 0.0, "rewards/chosen": 0.5395984649658203, "rewards/margins": -0.3091300129890442, "rewards/rejected": 0.8487284779548645, "step": 910 }, { "epoch": 1.65, "learning_rate": 4.44953331325382e-08, "logits/chosen": -2.31923508644104, "logits/rejected": -2.2927427291870117, "logps/chosen": -0.9982501268386841, "logps/rejected": 0.0, "loss": 0.5208, "rewards/accuracies": 1.0, "rewards/chosen": 0.5748960375785828, "rewards/margins": 0.5748960375785828, "rewards/rejected": 0.0, "step": 911 }, { "epoch": 1.65, "learning_rate": 4.439831150213311e-08, "logits/chosen": -2.3762402534484863, "logits/rejected": -2.3992905616760254, "logps/chosen": 0.0, "logps/rejected": -3.7205989360809326, "loss": 0.8689, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": -0.5691485404968262, "rewards/rejected": 0.5691485404968262, "step": 912 }, { "epoch": 1.65, "learning_rate": 4.4301311227067236e-08, "logits/chosen": -2.3199102878570557, "logits/rejected": -2.294299602508545, "logps/chosen": -1.8286880254745483, "logps/rejected": 0.0, "loss": 0.4641, "rewards/accuracies": 1.0, "rewards/chosen": 0.60589998960495, "rewards/margins": 0.60589998960495, "rewards/rejected": 0.0, "step": 913 }, { "epoch": 1.65, "learning_rate": 4.4204332677135075e-08, "logits/chosen": -2.4264776706695557, "logits/rejected": -2.4282608032226562, "logps/chosen": -2.0138697624206543, "logps/rejected": -3.1677286624908447, "loss": 0.7715, "rewards/accuracies": 1.0, "rewards/chosen": 0.8647106289863586, "rewards/margins": 0.27782851457595825, "rewards/rejected": 0.5868821144104004, "step": 914 }, { "epoch": 1.65, "learning_rate": 4.410737622204833e-08, "logits/chosen": -2.4940247535705566, "logits/rejected": -2.4567360877990723, "logps/chosen": -26.008337020874023, "logps/rejected": -35.03994369506836, "loss": 0.4916, "rewards/accuracies": 1.0, "rewards/chosen": 0.8698358535766602, "rewards/margins": 0.8399030566215515, "rewards/rejected": 0.029932785779237747, "step": 915 }, { "epoch": 1.66, "learning_rate": 4.401044223143451e-08, "logits/chosen": -2.1797983646392822, "logits/rejected": -2.1852097511291504, "logps/chosen": -1.385155439376831, "logps/rejected": -7.205033779144287, "loss": 0.4448, "rewards/accuracies": 1.0, "rewards/chosen": 0.50332111120224, "rewards/margins": 0.3316933512687683, "rewards/rejected": 0.17162775993347168, "step": 916 }, { "epoch": 1.66, "learning_rate": 4.391353107483545e-08, "logits/chosen": -2.352128028869629, "logits/rejected": -2.3743765354156494, "logps/chosen": -8.24459171295166, "logps/rejected": -7.88166618347168, "loss": 0.6804, "rewards/accuracies": 0.0, "rewards/chosen": 0.6789461970329285, "rewards/margins": -0.00847858190536499, "rewards/rejected": 0.6874247789382935, "step": 917 }, { "epoch": 1.66, "learning_rate": 4.381664312170591e-08, "logits/chosen": -2.3681514263153076, "logits/rejected": -2.3589911460876465, "logps/chosen": -1.1728817224502563, "logps/rejected": -4.075314521789551, "loss": 0.4943, "rewards/accuracies": 1.0, "rewards/chosen": 0.7994806170463562, "rewards/margins": 0.5147106647491455, "rewards/rejected": 0.2847699820995331, "step": 918 }, { "epoch": 1.66, "learning_rate": 4.371977874141224e-08, "logits/chosen": -2.2619144916534424, "logits/rejected": -2.3117666244506836, "logps/chosen": -1.9670310020446777, "logps/rejected": -25.6737117767334, "loss": 0.4979, "rewards/accuracies": 1.0, "rewards/chosen": 0.5398885011672974, "rewards/margins": 0.6677905321121216, "rewards/rejected": -0.12790203094482422, "step": 919 }, { "epoch": 1.66, "learning_rate": 4.3622938303230866e-08, "logits/chosen": -2.230534076690674, "logits/rejected": -2.2374162673950195, "logps/chosen": -3.6143150329589844, "logps/rejected": -11.193288803100586, "loss": 0.6212, "rewards/accuracies": 1.0, "rewards/chosen": 0.8495987057685852, "rewards/margins": 0.5350888967514038, "rewards/rejected": 0.314509779214859, "step": 920 }, { "epoch": 1.67, "learning_rate": 4.352612217634702e-08, "logits/chosen": -2.362966299057007, "logits/rejected": -2.4611239433288574, "logps/chosen": -1.0590929985046387, "logps/rejected": -37.92021560668945, "loss": 0.7057, "rewards/accuracies": 0.0, "rewards/chosen": 0.5614277720451355, "rewards/margins": -0.28422999382019043, "rewards/rejected": 0.8456577658653259, "step": 921 }, { "epoch": 1.67, "learning_rate": 4.342933072985316e-08, "logits/chosen": -2.2988839149475098, "logits/rejected": -2.3001508712768555, "logps/chosen": -0.8038504719734192, "logps/rejected": -6.632925510406494, "loss": 0.5386, "rewards/accuracies": 1.0, "rewards/chosen": 0.47755637764930725, "rewards/margins": 0.10579472780227661, "rewards/rejected": 0.37176164984703064, "step": 922 }, { "epoch": 1.67, "learning_rate": 4.333256433274774e-08, "logits/chosen": -2.282489776611328, "logits/rejected": -2.3472073078155518, "logps/chosen": -1.5914955139160156, "logps/rejected": -27.67327117919922, "loss": 0.471, "rewards/accuracies": 1.0, "rewards/chosen": 0.5701009631156921, "rewards/margins": 0.28333356976509094, "rewards/rejected": 0.2867673933506012, "step": 923 }, { "epoch": 1.67, "learning_rate": 4.323582335393364e-08, "logits/chosen": -2.3145155906677246, "logits/rejected": -2.314258575439453, "logps/chosen": -1.5681421756744385, "logps/rejected": -1.520132303237915, "loss": 0.6247, "rewards/accuracies": 1.0, "rewards/chosen": 0.7574604153633118, "rewards/margins": 0.061334311962127686, "rewards/rejected": 0.6961261034011841, "step": 924 }, { "epoch": 1.67, "learning_rate": 4.313910816221691e-08, "logits/chosen": -2.399170398712158, "logits/rejected": -2.4021997451782227, "logps/chosen": -3.8274145126342773, "logps/rejected": -2.5711727142333984, "loss": 0.7502, "rewards/accuracies": 0.0, "rewards/chosen": 0.6880654692649841, "rewards/margins": -0.022966444492340088, "rewards/rejected": 0.7110319137573242, "step": 925 }, { "epoch": 1.67, "learning_rate": 4.304241912630524e-08, "logits/chosen": -2.420029878616333, "logits/rejected": -2.4277327060699463, "logps/chosen": -3.724580764770508, "logps/rejected": -10.055028915405273, "loss": 0.514, "rewards/accuracies": 1.0, "rewards/chosen": 0.6009446382522583, "rewards/margins": 0.7317519187927246, "rewards/rejected": -0.1308073103427887, "step": 926 }, { "epoch": 1.68, "learning_rate": 4.2945756614806624e-08, "logits/chosen": -2.2648403644561768, "logits/rejected": -2.273247718811035, "logps/chosen": -1.2946228981018066, "logps/rejected": -3.029966354370117, "loss": 0.6335, "rewards/accuracies": 1.0, "rewards/chosen": 0.6960119009017944, "rewards/margins": 0.21400558948516846, "rewards/rejected": 0.482006311416626, "step": 927 }, { "epoch": 1.68, "learning_rate": 4.284912099622792e-08, "logits/chosen": -2.4187517166137695, "logits/rejected": -2.4122235774993896, "logps/chosen": -0.6902022361755371, "logps/rejected": -7.023197650909424, "loss": 0.6084, "rewards/accuracies": 1.0, "rewards/chosen": 0.46098968386650085, "rewards/margins": 0.12103581428527832, "rewards/rejected": 0.33995386958122253, "step": 928 }, { "epoch": 1.68, "learning_rate": 4.275251263897353e-08, "logits/chosen": -2.3463082313537598, "logits/rejected": -2.3432531356811523, "logps/chosen": -3.222580909729004, "logps/rejected": -4.674016952514648, "loss": 0.6778, "rewards/accuracies": 1.0, "rewards/chosen": 0.7800177931785583, "rewards/margins": 0.1147645115852356, "rewards/rejected": 0.6652532815933228, "step": 929 }, { "epoch": 1.68, "learning_rate": 4.2655931911343834e-08, "logits/chosen": -2.2392494678497314, "logits/rejected": -2.232095718383789, "logps/chosen": -2.0969953536987305, "logps/rejected": -9.876237869262695, "loss": 0.6587, "rewards/accuracies": 0.0, "rewards/chosen": 0.6038495898246765, "rewards/margins": -0.14438748359680176, "rewards/rejected": 0.7482370734214783, "step": 930 }, { "epoch": 1.68, "learning_rate": 4.255937918153392e-08, "logits/chosen": -2.4189698696136475, "logits/rejected": -2.4227936267852783, "logps/chosen": -1.8451184034347534, "logps/rejected": -10.901460647583008, "loss": 0.7869, "rewards/accuracies": 0.0, "rewards/chosen": 0.8689713478088379, "rewards/margins": -0.2445991039276123, "rewards/rejected": 1.1135704517364502, "step": 931 }, { "epoch": 1.69, "learning_rate": 4.246285481763216e-08, "logits/chosen": -2.3062970638275146, "logits/rejected": -2.376774549484253, "logps/chosen": -2.2904834747314453, "logps/rejected": -29.82693099975586, "loss": 0.6123, "rewards/accuracies": 1.0, "rewards/chosen": 0.6715086102485657, "rewards/margins": 0.2821989059448242, "rewards/rejected": 0.38930970430374146, "step": 932 }, { "epoch": 1.69, "learning_rate": 4.236635918761875e-08, "logits/chosen": -2.2962028980255127, "logits/rejected": -2.2967851161956787, "logps/chosen": -3.945765256881714, "logps/rejected": -2.790572166442871, "loss": 0.6439, "rewards/accuracies": 0.0, "rewards/chosen": 0.7026749849319458, "rewards/margins": -0.08001899719238281, "rewards/rejected": 0.7826939821243286, "step": 933 }, { "epoch": 1.69, "learning_rate": 4.226989265936437e-08, "logits/chosen": -2.4592409133911133, "logits/rejected": -2.464606761932373, "logps/chosen": -2.5523593425750732, "logps/rejected": -1.6098700761795044, "loss": 0.5905, "rewards/accuracies": 1.0, "rewards/chosen": 0.7224059104919434, "rewards/margins": 0.11929118633270264, "rewards/rejected": 0.6031147241592407, "step": 934 }, { "epoch": 1.69, "learning_rate": 4.2173455600628766e-08, "logits/chosen": -2.294536590576172, "logits/rejected": -2.2883834838867188, "logps/chosen": -2.166477918624878, "logps/rejected": -1.9172730445861816, "loss": 0.5603, "rewards/accuracies": 1.0, "rewards/chosen": 0.8234810829162598, "rewards/margins": 0.09796661138534546, "rewards/rejected": 0.7255144715309143, "step": 935 }, { "epoch": 1.69, "learning_rate": 4.207704837905929e-08, "logits/chosen": -2.272801399230957, "logits/rejected": -2.2686617374420166, "logps/chosen": -2.1846585273742676, "logps/rejected": -2.1275503635406494, "loss": 0.7691, "rewards/accuracies": 0.0, "rewards/chosen": 0.5313645005226135, "rewards/margins": -0.07046771049499512, "rewards/rejected": 0.6018322110176086, "step": 936 }, { "epoch": 1.69, "learning_rate": 4.198067136218957e-08, "logits/chosen": -2.3800249099731445, "logits/rejected": -2.4066691398620605, "logps/chosen": 0.0, "logps/rejected": -2.6307425498962402, "loss": 0.9747, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": -0.8712539672851562, "rewards/rejected": 0.8712539672851562, "step": 937 }, { "epoch": 1.7, "learning_rate": 4.1884324917438124e-08, "logits/chosen": -2.391200542449951, "logits/rejected": -2.396317958831787, "logps/chosen": -1.8201779127120972, "logps/rejected": -4.46785831451416, "loss": 0.5952, "rewards/accuracies": 1.0, "rewards/chosen": 0.7230674624443054, "rewards/margins": 0.2988682687282562, "rewards/rejected": 0.4241991937160492, "step": 938 }, { "epoch": 1.7, "learning_rate": 4.1788009412106863e-08, "logits/chosen": -2.2326929569244385, "logits/rejected": -2.2346994876861572, "logps/chosen": -5.102174758911133, "logps/rejected": -7.169028282165527, "loss": 0.7024, "rewards/accuracies": 1.0, "rewards/chosen": 0.5416576266288757, "rewards/margins": 0.028871119022369385, "rewards/rejected": 0.5127865076065063, "step": 939 }, { "epoch": 1.7, "learning_rate": 4.169172521337976e-08, "logits/chosen": -2.369467258453369, "logits/rejected": -2.3709640502929688, "logps/chosen": -2.477283000946045, "logps/rejected": -9.652547836303711, "loss": 0.6384, "rewards/accuracies": 0.0, "rewards/chosen": 0.5466691851615906, "rewards/margins": -0.2750593423843384, "rewards/rejected": 0.821728527545929, "step": 940 }, { "epoch": 1.7, "learning_rate": 4.1595472688321434e-08, "logits/chosen": -2.222525119781494, "logits/rejected": -2.223783254623413, "logps/chosen": -1.9136178493499756, "logps/rejected": -3.470906972885132, "loss": 0.6005, "rewards/accuracies": 1.0, "rewards/chosen": 0.7822101712226868, "rewards/margins": 0.41300055384635925, "rewards/rejected": 0.3692096173763275, "step": 941 }, { "epoch": 1.7, "learning_rate": 4.1499252203875815e-08, "logits/chosen": -2.2998836040496826, "logits/rejected": -2.298027515411377, "logps/chosen": -0.8083322644233704, "logps/rejected": -4.543056011199951, "loss": 0.6867, "rewards/accuracies": 1.0, "rewards/chosen": 0.3949717879295349, "rewards/margins": 0.10283863544464111, "rewards/rejected": 0.2921331524848938, "step": 942 }, { "epoch": 1.71, "learning_rate": 4.1403064126864595e-08, "logits/chosen": -2.3942272663116455, "logits/rejected": -2.390688896179199, "logps/chosen": -8.598329544067383, "logps/rejected": -4.949061393737793, "loss": 0.6362, "rewards/accuracies": 1.0, "rewards/chosen": 0.940817654132843, "rewards/margins": 0.4734671711921692, "rewards/rejected": 0.46735048294067383, "step": 943 }, { "epoch": 1.71, "learning_rate": 4.1306908823985986e-08, "logits/chosen": -2.167677879333496, "logits/rejected": -2.169863700866699, "logps/chosen": -0.6896044015884399, "logps/rejected": -6.506817817687988, "loss": 0.71, "rewards/accuracies": 0.0, "rewards/chosen": 0.509990394115448, "rewards/margins": -0.16201651096343994, "rewards/rejected": 0.6720069050788879, "step": 944 }, { "epoch": 1.71, "learning_rate": 4.1210786661813214e-08, "logits/chosen": -2.212108850479126, "logits/rejected": -2.2250771522521973, "logps/chosen": -3.4043049812316895, "logps/rejected": -15.574398040771484, "loss": 0.568, "rewards/accuracies": 1.0, "rewards/chosen": 0.6716113686561584, "rewards/margins": 0.36167770624160767, "rewards/rejected": 0.3099336624145508, "step": 945 }, { "epoch": 1.71, "learning_rate": 4.1114698006793156e-08, "logits/chosen": -2.355241537094116, "logits/rejected": -2.3562417030334473, "logps/chosen": -2.585221529006958, "logps/rejected": -5.414143085479736, "loss": 0.5904, "rewards/accuracies": 1.0, "rewards/chosen": 0.9612065553665161, "rewards/margins": 0.4642188847064972, "rewards/rejected": 0.4969876706600189, "step": 946 }, { "epoch": 1.71, "learning_rate": 4.1018643225245007e-08, "logits/chosen": -2.3277804851531982, "logits/rejected": -2.332846164703369, "logps/chosen": -3.455763339996338, "logps/rejected": -3.079756021499634, "loss": 0.563, "rewards/accuracies": 1.0, "rewards/chosen": 0.7628587484359741, "rewards/margins": 0.3161526322364807, "rewards/rejected": 0.4467061161994934, "step": 947 }, { "epoch": 1.71, "learning_rate": 4.092262268335879e-08, "logits/chosen": -2.40966534614563, "logits/rejected": -2.3493459224700928, "logps/chosen": -27.73733901977539, "logps/rejected": -2.9828238487243652, "loss": 1.0687, "rewards/accuracies": 0.0, "rewards/chosen": -0.7984848022460938, "rewards/margins": -1.2729238271713257, "rewards/rejected": 0.4744390547275543, "step": 948 }, { "epoch": 1.72, "learning_rate": 4.0826636747193976e-08, "logits/chosen": -2.335606575012207, "logits/rejected": -2.328355312347412, "logps/chosen": -2.2828900814056396, "logps/rejected": -5.182745933532715, "loss": 0.5355, "rewards/accuracies": 1.0, "rewards/chosen": 0.7474666833877563, "rewards/margins": 0.3403441607952118, "rewards/rejected": 0.40712252259254456, "step": 949 }, { "epoch": 1.72, "learning_rate": 4.073068578267812e-08, "logits/chosen": -2.4273102283477783, "logits/rejected": -2.4253573417663574, "logps/chosen": -2.725874423980713, "logps/rejected": -1.6207046508789062, "loss": 0.6254, "rewards/accuracies": 1.0, "rewards/chosen": 0.5800719857215881, "rewards/margins": 0.017191529273986816, "rewards/rejected": 0.5628804564476013, "step": 950 }, { "epoch": 1.72, "learning_rate": 4.063477015560551e-08, "logits/chosen": -2.4285383224487305, "logits/rejected": -2.4351425170898438, "logps/chosen": -2.2640528678894043, "logps/rejected": -3.6189041137695312, "loss": 0.6281, "rewards/accuracies": 1.0, "rewards/chosen": 0.9218578338623047, "rewards/margins": 0.23053014278411865, "rewards/rejected": 0.691327691078186, "step": 951 }, { "epoch": 1.72, "learning_rate": 4.053889023163562e-08, "logits/chosen": -2.3293941020965576, "logits/rejected": -2.3243422508239746, "logps/chosen": -3.2186036109924316, "logps/rejected": -10.197040557861328, "loss": 0.8459, "rewards/accuracies": 0.0, "rewards/chosen": 0.8681861758232117, "rewards/margins": -0.4170028567314148, "rewards/rejected": 1.2851890325546265, "step": 952 }, { "epoch": 1.72, "learning_rate": 4.04430463762919e-08, "logits/chosen": -2.3414061069488525, "logits/rejected": -2.345008611679077, "logps/chosen": -2.441831588745117, "logps/rejected": -4.164527893066406, "loss": 0.4905, "rewards/accuracies": 1.0, "rewards/chosen": 0.7253730893135071, "rewards/margins": 0.3060832917690277, "rewards/rejected": 0.41928979754447937, "step": 953 }, { "epoch": 1.73, "learning_rate": 4.034723895496022e-08, "logits/chosen": -2.3332343101501465, "logits/rejected": -2.332632303237915, "logps/chosen": -1.1534790992736816, "logps/rejected": -5.356518745422363, "loss": 0.4762, "rewards/accuracies": 1.0, "rewards/chosen": 0.595440149307251, "rewards/margins": 0.19642749428749084, "rewards/rejected": 0.39901265501976013, "step": 954 }, { "epoch": 1.73, "learning_rate": 4.0251468332887625e-08, "logits/chosen": -2.190544605255127, "logits/rejected": -2.182492256164551, "logps/chosen": -1.380921483039856, "logps/rejected": -7.084914684295654, "loss": 0.7102, "rewards/accuracies": 0.0, "rewards/chosen": 0.6428808569908142, "rewards/margins": -0.17881107330322266, "rewards/rejected": 0.8216919302940369, "step": 955 }, { "epoch": 1.73, "learning_rate": 4.015573487518082e-08, "logits/chosen": -2.3723838329315186, "logits/rejected": -2.477466344833374, "logps/chosen": -2.384193181991577, "logps/rejected": -41.03216552734375, "loss": 0.6154, "rewards/accuracies": 0.0, "rewards/chosen": 0.5502312779426575, "rewards/margins": -0.07714605331420898, "rewards/rejected": 0.6273773312568665, "step": 956 }, { "epoch": 1.73, "learning_rate": 4.006003894680486e-08, "logits/chosen": -2.216862916946411, "logits/rejected": -2.281261682510376, "logps/chosen": -1.3541761636734009, "logps/rejected": -29.013280868530273, "loss": 0.403, "rewards/accuracies": 1.0, "rewards/chosen": 1.0049716234207153, "rewards/margins": 0.8554860353469849, "rewards/rejected": 0.14948558807373047, "step": 957 }, { "epoch": 1.73, "learning_rate": 3.9964380912581685e-08, "logits/chosen": -2.2682485580444336, "logits/rejected": -2.2682180404663086, "logps/chosen": -2.053269147872925, "logps/rejected": -5.006110191345215, "loss": 0.7101, "rewards/accuracies": 0.0, "rewards/chosen": 0.49493226408958435, "rewards/margins": -0.1306261122226715, "rewards/rejected": 0.6255583763122559, "step": 958 }, { "epoch": 1.73, "learning_rate": 3.98687611371888e-08, "logits/chosen": -2.433220624923706, "logits/rejected": -2.4449963569641113, "logps/chosen": -4.182671546936035, "logps/rejected": -4.683021545410156, "loss": 0.6406, "rewards/accuracies": 1.0, "rewards/chosen": 0.7282434701919556, "rewards/margins": 0.0037099123001098633, "rewards/rejected": 0.7245335578918457, "step": 959 }, { "epoch": 1.74, "learning_rate": 3.977317998515788e-08, "logits/chosen": -2.3026466369628906, "logits/rejected": -2.3070404529571533, "logps/chosen": -1.4208542108535767, "logps/rejected": -2.884209632873535, "loss": 0.7989, "rewards/accuracies": 0.0, "rewards/chosen": 0.8399645090103149, "rewards/margins": -0.07653331756591797, "rewards/rejected": 0.9164978265762329, "step": 960 }, { "epoch": 1.74, "learning_rate": 3.9677637820873314e-08, "logits/chosen": -2.3835320472717285, "logits/rejected": -2.3922946453094482, "logps/chosen": -2.51741361618042, "logps/rejected": -8.356119155883789, "loss": 0.5789, "rewards/accuracies": 1.0, "rewards/chosen": 0.8694978952407837, "rewards/margins": 0.47509148716926575, "rewards/rejected": 0.39440640807151794, "step": 961 }, { "epoch": 1.74, "learning_rate": 3.958213500857086e-08, "logits/chosen": -2.28444504737854, "logits/rejected": -2.2727713584899902, "logps/chosen": -3.9790477752685547, "logps/rejected": -5.623371124267578, "loss": 0.7071, "rewards/accuracies": 0.0, "rewards/chosen": 0.6095542311668396, "rewards/margins": -0.3691673278808594, "rewards/rejected": 0.978721559047699, "step": 962 }, { "epoch": 1.74, "learning_rate": 3.948667191233627e-08, "logits/chosen": -2.2784204483032227, "logits/rejected": -2.2804934978485107, "logps/chosen": -1.777904748916626, "logps/rejected": -9.339620590209961, "loss": 0.7008, "rewards/accuracies": 0.0, "rewards/chosen": 0.5131254196166992, "rewards/margins": -0.3735758066177368, "rewards/rejected": 0.886701226234436, "step": 963 }, { "epoch": 1.74, "learning_rate": 3.93912488961039e-08, "logits/chosen": -2.287712812423706, "logits/rejected": -2.3449900150299072, "logps/chosen": -6.999612808227539, "logps/rejected": -36.71642303466797, "loss": 0.621, "rewards/accuracies": 1.0, "rewards/chosen": 1.085767149925232, "rewards/margins": 0.34362465143203735, "rewards/rejected": 0.7421424984931946, "step": 964 }, { "epoch": 1.75, "learning_rate": 3.929586632365528e-08, "logits/chosen": -2.2185044288635254, "logits/rejected": -2.2284858226776123, "logps/chosen": -2.4594814777374268, "logps/rejected": -6.651813507080078, "loss": 0.7701, "rewards/accuracies": 0.0, "rewards/chosen": 0.6133900880813599, "rewards/margins": -0.42542850971221924, "rewards/rejected": 1.038818597793579, "step": 965 }, { "epoch": 1.75, "learning_rate": 3.9200524558617774e-08, "logits/chosen": -2.3669519424438477, "logits/rejected": -2.342496871948242, "logps/chosen": -0.8146802186965942, "logps/rejected": 0.0, "loss": 0.5882, "rewards/accuracies": 1.0, "rewards/chosen": 0.4759305417537689, "rewards/margins": 0.4759305417537689, "rewards/rejected": 0.0, "step": 966 }, { "epoch": 1.75, "learning_rate": 3.910522396446314e-08, "logits/chosen": -2.266530752182007, "logits/rejected": -2.2730727195739746, "logps/chosen": -2.803123712539673, "logps/rejected": -1.5470571517944336, "loss": 0.5748, "rewards/accuracies": 0.0, "rewards/chosen": 0.7208264470100403, "rewards/margins": -0.0569189190864563, "rewards/rejected": 0.7777453660964966, "step": 967 }, { "epoch": 1.75, "learning_rate": 3.9009964904506255e-08, "logits/chosen": -2.2334342002868652, "logits/rejected": -2.227698802947998, "logps/chosen": -2.6417288780212402, "logps/rejected": -9.069199562072754, "loss": 0.4914, "rewards/accuracies": 1.0, "rewards/chosen": 0.7201494574546814, "rewards/margins": 0.5145331621170044, "rewards/rejected": 0.2056162804365158, "step": 968 }, { "epoch": 1.75, "learning_rate": 3.891474774190359e-08, "logits/chosen": -2.4135851860046387, "logits/rejected": -2.4113991260528564, "logps/chosen": -3.2607452869415283, "logps/rejected": -2.528315305709839, "loss": 0.5896, "rewards/accuracies": 1.0, "rewards/chosen": 0.6694598197937012, "rewards/margins": 0.25028085708618164, "rewards/rejected": 0.41917896270751953, "step": 969 }, { "epoch": 1.75, "learning_rate": 3.881957283965192e-08, "logits/chosen": -2.351405382156372, "logits/rejected": -2.346010208129883, "logps/chosen": -2.7556471824645996, "logps/rejected": -9.957784652709961, "loss": 0.5904, "rewards/accuracies": 0.0, "rewards/chosen": 0.6381838321685791, "rewards/margins": -0.31893765926361084, "rewards/rejected": 0.9571214914321899, "step": 970 }, { "epoch": 1.76, "learning_rate": 3.872444056058686e-08, "logits/chosen": -2.231973648071289, "logits/rejected": -2.225332021713257, "logps/chosen": -2.4858994483947754, "logps/rejected": -2.9283931255340576, "loss": 0.6637, "rewards/accuracies": 1.0, "rewards/chosen": 0.5274451971054077, "rewards/margins": 0.1653565764427185, "rewards/rejected": 0.3620886206626892, "step": 971 }, { "epoch": 1.76, "learning_rate": 3.862935126738162e-08, "logits/chosen": -2.3494718074798584, "logits/rejected": -2.3252172470092773, "logps/chosen": -0.9908190965652466, "logps/rejected": 0.0, "loss": 0.398, "rewards/accuracies": 1.0, "rewards/chosen": 0.6677009463310242, "rewards/margins": 0.6677009463310242, "rewards/rejected": 0.0, "step": 972 }, { "epoch": 1.76, "learning_rate": 3.853430532254547e-08, "logits/chosen": -2.2849104404449463, "logits/rejected": -2.2899301052093506, "logps/chosen": -1.1748944520950317, "logps/rejected": -1.1477429866790771, "loss": 0.7413, "rewards/accuracies": 0.0, "rewards/chosen": 0.5735133290290833, "rewards/margins": -0.07793599367141724, "rewards/rejected": 0.6514493227005005, "step": 973 }, { "epoch": 1.76, "learning_rate": 3.843930308842244e-08, "logits/chosen": -2.3176021575927734, "logits/rejected": -2.2876927852630615, "logps/chosen": -1.868465542793274, "logps/rejected": 0.0, "loss": 0.5268, "rewards/accuracies": 1.0, "rewards/chosen": 0.7020944952964783, "rewards/margins": 0.7020944952964783, "rewards/rejected": 0.0, "step": 974 }, { "epoch": 1.76, "learning_rate": 3.834434492718994e-08, "logits/chosen": -2.3950953483581543, "logits/rejected": -2.395418167114258, "logps/chosen": -8.206099510192871, "logps/rejected": -5.740131855010986, "loss": 0.629, "rewards/accuracies": 1.0, "rewards/chosen": 0.8358756303787231, "rewards/margins": 0.17932528257369995, "rewards/rejected": 0.6565503478050232, "step": 975 }, { "epoch": 1.76, "learning_rate": 3.82494312008573e-08, "logits/chosen": -2.3390605449676514, "logits/rejected": -2.35855770111084, "logps/chosen": -1.5970325469970703, "logps/rejected": -24.35993766784668, "loss": 0.6489, "rewards/accuracies": 0.0, "rewards/chosen": 0.45447811484336853, "rewards/margins": -0.034059494733810425, "rewards/rejected": 0.48853760957717896, "step": 976 }, { "epoch": 1.77, "learning_rate": 3.815456227126454e-08, "logits/chosen": -2.362133502960205, "logits/rejected": -2.351667881011963, "logps/chosen": -5.032365798950195, "logps/rejected": -2.537936210632324, "loss": 0.6518, "rewards/accuracies": 1.0, "rewards/chosen": 0.7161678671836853, "rewards/margins": 0.14278644323349, "rewards/rejected": 0.5733814239501953, "step": 977 }, { "epoch": 1.77, "learning_rate": 3.805973850008084e-08, "logits/chosen": -2.4778480529785156, "logits/rejected": -2.5022568702697754, "logps/chosen": 0.0, "logps/rejected": -7.32996129989624, "loss": 1.0262, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": -0.9102949500083923, "rewards/rejected": 0.9102949500083923, "step": 978 }, { "epoch": 1.77, "learning_rate": 3.796496024880324e-08, "logits/chosen": -2.247556209564209, "logits/rejected": -2.2421493530273438, "logps/chosen": -4.390712738037109, "logps/rejected": -4.771873474121094, "loss": 0.5312, "rewards/accuracies": 1.0, "rewards/chosen": 1.026133418083191, "rewards/margins": 0.531385064125061, "rewards/rejected": 0.4947483241558075, "step": 979 }, { "epoch": 1.77, "learning_rate": 3.7870227878755215e-08, "logits/chosen": -2.1438674926757812, "logits/rejected": -2.1428885459899902, "logps/chosen": -0.8707922697067261, "logps/rejected": -2.2019670009613037, "loss": 0.6275, "rewards/accuracies": 1.0, "rewards/chosen": 0.5274275541305542, "rewards/margins": 0.09640577435493469, "rewards/rejected": 0.4310217797756195, "step": 980 }, { "epoch": 1.77, "learning_rate": 3.7775541751085386e-08, "logits/chosen": -2.3293464183807373, "logits/rejected": -2.3332159519195557, "logps/chosen": -5.023873805999756, "logps/rejected": -11.995623588562012, "loss": 0.5951, "rewards/accuracies": 1.0, "rewards/chosen": 0.5139468908309937, "rewards/margins": 0.3151422142982483, "rewards/rejected": 0.19880466163158417, "step": 981 }, { "epoch": 1.78, "learning_rate": 3.7680902226766046e-08, "logits/chosen": -2.419445514678955, "logits/rejected": -2.411177158355713, "logps/chosen": -7.722092628479004, "logps/rejected": -2.516274929046631, "loss": 0.5654, "rewards/accuracies": 1.0, "rewards/chosen": 0.8702689409255981, "rewards/margins": 0.28613728284835815, "rewards/rejected": 0.58413165807724, "step": 982 }, { "epoch": 1.78, "learning_rate": 3.758630966659182e-08, "logits/chosen": -2.248478412628174, "logits/rejected": -2.251642942428589, "logps/chosen": -6.711202621459961, "logps/rejected": -4.2524027824401855, "loss": 0.4032, "rewards/accuracies": 1.0, "rewards/chosen": 1.0176292657852173, "rewards/margins": 0.45308393239974976, "rewards/rejected": 0.5645453333854675, "step": 983 }, { "epoch": 1.78, "learning_rate": 3.7491764431178284e-08, "logits/chosen": -2.2965590953826904, "logits/rejected": -2.3058767318725586, "logps/chosen": -1.8681280612945557, "logps/rejected": -13.744305610656738, "loss": 0.4762, "rewards/accuracies": 1.0, "rewards/chosen": 1.006203055381775, "rewards/margins": 0.8621707558631897, "rewards/rejected": 0.144032284617424, "step": 984 }, { "epoch": 1.78, "learning_rate": 3.7397266880960666e-08, "logits/chosen": -2.2401297092437744, "logits/rejected": -2.2311198711395264, "logps/chosen": -2.323307991027832, "logps/rejected": -6.474714756011963, "loss": 0.7679, "rewards/accuracies": 0.0, "rewards/chosen": 0.737991988658905, "rewards/margins": -0.1917746663093567, "rewards/rejected": 0.9297666549682617, "step": 985 }, { "epoch": 1.78, "learning_rate": 3.73028173761923e-08, "logits/chosen": -2.3493590354919434, "logits/rejected": -2.357206106185913, "logps/chosen": -2.1925816535949707, "logps/rejected": -2.2391226291656494, "loss": 0.6446, "rewards/accuracies": 0.0, "rewards/chosen": 0.4116453230381012, "rewards/margins": -0.11098197102546692, "rewards/rejected": 0.5226272940635681, "step": 986 }, { "epoch": 1.78, "learning_rate": 3.720841627694341e-08, "logits/chosen": -2.128260374069214, "logits/rejected": -2.134669780731201, "logps/chosen": -2.441168785095215, "logps/rejected": -3.302757978439331, "loss": 0.6671, "rewards/accuracies": 1.0, "rewards/chosen": 0.43991634249687195, "rewards/margins": 0.019741356372833252, "rewards/rejected": 0.4201749861240387, "step": 987 }, { "epoch": 1.79, "learning_rate": 3.7114063943099706e-08, "logits/chosen": -2.2199387550354004, "logits/rejected": -2.2117502689361572, "logps/chosen": -3.7630531787872314, "logps/rejected": -12.055333137512207, "loss": 0.5372, "rewards/accuracies": 1.0, "rewards/chosen": 0.7763342261314392, "rewards/margins": 0.3332482576370239, "rewards/rejected": 0.4430859684944153, "step": 988 }, { "epoch": 1.79, "learning_rate": 3.701976073436093e-08, "logits/chosen": -2.3322930335998535, "logits/rejected": -2.335096597671509, "logps/chosen": -4.15881872177124, "logps/rejected": -9.328154563903809, "loss": 0.8869, "rewards/accuracies": 0.0, "rewards/chosen": 0.8514442443847656, "rewards/margins": -0.4530981779098511, "rewards/rejected": 1.3045424222946167, "step": 989 }, { "epoch": 1.79, "learning_rate": 3.69255070102396e-08, "logits/chosen": -2.3956260681152344, "logits/rejected": -2.409011125564575, "logps/chosen": -0.9414513111114502, "logps/rejected": -16.48558807373047, "loss": 0.6317, "rewards/accuracies": 1.0, "rewards/chosen": 0.43678340315818787, "rewards/margins": 0.31394824385643005, "rewards/rejected": 0.12283515930175781, "step": 990 }, { "epoch": 1.79, "learning_rate": 3.683130313005956e-08, "logits/chosen": -2.3503997325897217, "logits/rejected": -2.350050210952759, "logps/chosen": -10.269828796386719, "logps/rejected": -4.28183126449585, "loss": 0.498, "rewards/accuracies": 1.0, "rewards/chosen": 0.9411415457725525, "rewards/margins": 0.2678182125091553, "rewards/rejected": 0.6733233332633972, "step": 991 }, { "epoch": 1.79, "learning_rate": 3.6737149452954625e-08, "logits/chosen": -2.322758197784424, "logits/rejected": -2.339789390563965, "logps/chosen": -4.401342868804932, "logps/rejected": -10.391924858093262, "loss": 0.366, "rewards/accuracies": 1.0, "rewards/chosen": 0.648518979549408, "rewards/margins": 0.36627399921417236, "rewards/rejected": 0.2822449803352356, "step": 992 }, { "epoch": 1.8, "learning_rate": 3.664304633786722e-08, "logits/chosen": -2.271531820297241, "logits/rejected": -2.269024610519409, "logps/chosen": -3.2147574424743652, "logps/rejected": -3.247544288635254, "loss": 0.6338, "rewards/accuracies": 0.0, "rewards/chosen": 0.5157867074012756, "rewards/margins": -0.06872504949569702, "rewards/rejected": 0.5845117568969727, "step": 993 }, { "epoch": 1.8, "learning_rate": 3.654899414354706e-08, "logits/chosen": -2.288682460784912, "logits/rejected": -2.288578987121582, "logps/chosen": -3.6996803283691406, "logps/rejected": -3.388731002807617, "loss": 0.5691, "rewards/accuracies": 1.0, "rewards/chosen": 0.7913461923599243, "rewards/margins": 0.2684491276741028, "rewards/rejected": 0.5228970646858215, "step": 994 }, { "epoch": 1.8, "learning_rate": 3.6454993228549685e-08, "logits/chosen": -2.312687397003174, "logits/rejected": -2.311539888381958, "logps/chosen": -2.2155659198760986, "logps/rejected": -0.6211298108100891, "loss": 0.6028, "rewards/accuracies": 1.0, "rewards/chosen": 0.46295443177223206, "rewards/margins": 0.05392789840698242, "rewards/rejected": 0.40902653336524963, "step": 995 }, { "epoch": 1.8, "learning_rate": 3.6361043951235127e-08, "logits/chosen": -2.2638638019561768, "logits/rejected": -2.263345718383789, "logps/chosen": -2.451047420501709, "logps/rejected": -7.711325645446777, "loss": 0.6378, "rewards/accuracies": 0.0, "rewards/chosen": 0.8378721475601196, "rewards/margins": -0.0542445182800293, "rewards/rejected": 0.8921166658401489, "step": 996 }, { "epoch": 1.8, "learning_rate": 3.626714666976659e-08, "logits/chosen": -2.4138848781585693, "logits/rejected": -2.4432899951934814, "logps/chosen": 0.0, "logps/rejected": -2.278498411178589, "loss": 0.8915, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": -0.7573872804641724, "rewards/rejected": 0.7573872804641724, "step": 997 }, { "epoch": 1.8, "learning_rate": 3.617330174210909e-08, "logits/chosen": -2.4439077377319336, "logits/rejected": -2.416985034942627, "logps/chosen": -0.6412790417671204, "logps/rejected": 0.0, "loss": 0.5385, "rewards/accuracies": 1.0, "rewards/chosen": 0.5934980511665344, "rewards/margins": 0.5934980511665344, "rewards/rejected": 0.0, "step": 998 }, { "epoch": 1.81, "learning_rate": 3.607950952602798e-08, "logits/chosen": -2.4146950244903564, "logits/rejected": -2.421874523162842, "logps/chosen": -2.0067994594573975, "logps/rejected": -2.466146945953369, "loss": 0.6474, "rewards/accuracies": 0.0, "rewards/chosen": 0.6201195120811462, "rewards/margins": -0.15247738361358643, "rewards/rejected": 0.7725968956947327, "step": 999 }, { "epoch": 1.81, "learning_rate": 3.598577037908772e-08, "logits/chosen": -2.266814708709717, "logits/rejected": -2.4270389080047607, "logps/chosen": -2.060291051864624, "logps/rejected": -25.784875869750977, "loss": 0.5997, "rewards/accuracies": 1.0, "rewards/chosen": 0.6974186301231384, "rewards/margins": 0.3473871648311615, "rewards/rejected": 0.35003146529197693, "step": 1000 }, { "epoch": 1.81, "learning_rate": 3.5892084658650406e-08, "logits/chosen": -2.258295774459839, "logits/rejected": -2.2562594413757324, "logps/chosen": -1.6324949264526367, "logps/rejected": -2.7073280811309814, "loss": 0.4026, "rewards/accuracies": 1.0, "rewards/chosen": 0.7641558647155762, "rewards/margins": 0.34977835416793823, "rewards/rejected": 0.41437751054763794, "step": 1001 }, { "epoch": 1.81, "learning_rate": 3.5798452721874485e-08, "logits/chosen": -2.2039270401000977, "logits/rejected": -2.2642712593078613, "logps/chosen": -1.742652177810669, "logps/rejected": -31.04785919189453, "loss": 0.4549, "rewards/accuracies": 1.0, "rewards/chosen": 0.945850670337677, "rewards/margins": 0.8641452193260193, "rewards/rejected": 0.0817054733633995, "step": 1002 }, { "epoch": 1.81, "learning_rate": 3.570487492571338e-08, "logits/chosen": -2.359121322631836, "logits/rejected": -2.364837408065796, "logps/chosen": -3.778186082839966, "logps/rejected": -11.932511329650879, "loss": 0.5955, "rewards/accuracies": 1.0, "rewards/chosen": 0.9453337788581848, "rewards/margins": 0.5553075075149536, "rewards/rejected": 0.3900263011455536, "step": 1003 }, { "epoch": 1.82, "learning_rate": 3.561135162691408e-08, "logits/chosen": -2.339428186416626, "logits/rejected": -2.3413093090057373, "logps/chosen": -3.739253520965576, "logps/rejected": -3.2709436416625977, "loss": 0.7235, "rewards/accuracies": 1.0, "rewards/chosen": 0.6705957055091858, "rewards/margins": 0.18308836221694946, "rewards/rejected": 0.48750734329223633, "step": 1004 }, { "epoch": 1.82, "learning_rate": 3.551788318201582e-08, "logits/chosen": -2.225898027420044, "logits/rejected": -2.2302489280700684, "logps/chosen": -2.0537140369415283, "logps/rejected": -2.3339414596557617, "loss": 0.7317, "rewards/accuracies": 0.0, "rewards/chosen": 0.49358493089675903, "rewards/margins": -0.18647050857543945, "rewards/rejected": 0.6800554394721985, "step": 1005 }, { "epoch": 1.82, "learning_rate": 3.5424469947348714e-08, "logits/chosen": -2.341684103012085, "logits/rejected": -2.340648651123047, "logps/chosen": -1.992936372756958, "logps/rejected": -4.793848991394043, "loss": 0.7672, "rewards/accuracies": 0.0, "rewards/chosen": 0.511828601360321, "rewards/margins": -0.2184886932373047, "rewards/rejected": 0.7303172945976257, "step": 1006 }, { "epoch": 1.82, "learning_rate": 3.5331112279032436e-08, "logits/chosen": -2.2131223678588867, "logits/rejected": -2.210944652557373, "logps/chosen": -3.367733955383301, "logps/rejected": -1.2374234199523926, "loss": 0.7619, "rewards/accuracies": 0.0, "rewards/chosen": 0.6567689180374146, "rewards/margins": -0.022215604782104492, "rewards/rejected": 0.678984522819519, "step": 1007 }, { "epoch": 1.82, "learning_rate": 3.5237810532974776e-08, "logits/chosen": -2.2595977783203125, "logits/rejected": -2.3187313079833984, "logps/chosen": -3.4009175300598145, "logps/rejected": -10.362580299377441, "loss": 0.6291, "rewards/accuracies": 1.0, "rewards/chosen": 0.8419082760810852, "rewards/margins": 0.18499630689620972, "rewards/rejected": 0.6569119691848755, "step": 1008 }, { "epoch": 1.82, "learning_rate": 3.5144565064870354e-08, "logits/chosen": -2.3649747371673584, "logits/rejected": -2.3641488552093506, "logps/chosen": -3.073298454284668, "logps/rejected": -2.5090582370758057, "loss": 0.5376, "rewards/accuracies": 1.0, "rewards/chosen": 0.851948082447052, "rewards/margins": 0.4321323335170746, "rewards/rejected": 0.4198157489299774, "step": 1009 }, { "epoch": 1.83, "learning_rate": 3.505137623019922e-08, "logits/chosen": -2.3817474842071533, "logits/rejected": -2.3837618827819824, "logps/chosen": -2.5497100353240967, "logps/rejected": -2.3221044540405273, "loss": 0.6962, "rewards/accuracies": 1.0, "rewards/chosen": 0.8132883310317993, "rewards/margins": 0.08211755752563477, "rewards/rejected": 0.7311707735061646, "step": 1010 }, { "epoch": 1.83, "learning_rate": 3.495824438422555e-08, "logits/chosen": -2.3211865425109863, "logits/rejected": -2.314389944076538, "logps/chosen": -0.7194143533706665, "logps/rejected": -4.0521416664123535, "loss": 0.9199, "rewards/accuracies": 0.0, "rewards/chosen": 0.5422571897506714, "rewards/margins": -0.07612484693527222, "rewards/rejected": 0.6183820366859436, "step": 1011 }, { "epoch": 1.83, "learning_rate": 3.4865169881996256e-08, "logits/chosen": -2.377758026123047, "logits/rejected": -2.377910614013672, "logps/chosen": -11.614358901977539, "logps/rejected": -1.5414732694625854, "loss": 0.7936, "rewards/accuracies": 0.0, "rewards/chosen": 0.34563159942626953, "rewards/margins": -0.11143603920936584, "rewards/rejected": 0.4570676386356354, "step": 1012 }, { "epoch": 1.83, "learning_rate": 3.477215307833963e-08, "logits/chosen": -2.342588186264038, "logits/rejected": -2.317080497741699, "logps/chosen": -3.799947738647461, "logps/rejected": 0.0, "loss": 0.5207, "rewards/accuracies": 1.0, "rewards/chosen": 0.6916653513908386, "rewards/margins": 0.6916653513908386, "rewards/rejected": 0.0, "step": 1013 }, { "epoch": 1.83, "learning_rate": 3.467919432786398e-08, "logits/chosen": -2.280975103378296, "logits/rejected": -2.2834441661834717, "logps/chosen": -1.2919118404388428, "logps/rejected": -13.132427215576172, "loss": 0.6768, "rewards/accuracies": 0.0, "rewards/chosen": 0.509874165058136, "rewards/margins": -0.254646897315979, "rewards/rejected": 0.764521062374115, "step": 1014 }, { "epoch": 1.84, "learning_rate": 3.4586293984956315e-08, "logits/chosen": -2.2455508708953857, "logits/rejected": -2.2490694522857666, "logps/chosen": -0.9749374985694885, "logps/rejected": -6.181750297546387, "loss": 0.6615, "rewards/accuracies": 1.0, "rewards/chosen": 0.6540549397468567, "rewards/margins": 0.24360010027885437, "rewards/rejected": 0.4104548394680023, "step": 1015 }, { "epoch": 1.84, "learning_rate": 3.449345240378102e-08, "logits/chosen": -2.430922269821167, "logits/rejected": -2.4252283573150635, "logps/chosen": -1.5188350677490234, "logps/rejected": -7.337891101837158, "loss": 0.4554, "rewards/accuracies": 1.0, "rewards/chosen": 0.9811843037605286, "rewards/margins": 0.36605966091156006, "rewards/rejected": 0.6151246428489685, "step": 1016 }, { "epoch": 1.84, "learning_rate": 3.440066993827839e-08, "logits/chosen": -2.3062744140625, "logits/rejected": -2.307525157928467, "logps/chosen": -6.21551513671875, "logps/rejected": -0.8741588592529297, "loss": 0.5772, "rewards/accuracies": 1.0, "rewards/chosen": 0.7084261178970337, "rewards/margins": 0.10412144660949707, "rewards/rejected": 0.6043046712875366, "step": 1017 }, { "epoch": 1.84, "learning_rate": 3.430794694216339e-08, "logits/chosen": -2.3714756965637207, "logits/rejected": -2.371757745742798, "logps/chosen": -10.835549354553223, "logps/rejected": -2.7459120750427246, "loss": 0.6359, "rewards/accuracies": 1.0, "rewards/chosen": 0.973111629486084, "rewards/margins": 0.3559976816177368, "rewards/rejected": 0.6171139478683472, "step": 1018 }, { "epoch": 1.84, "learning_rate": 3.4215283768924254e-08, "logits/chosen": -2.3112740516662598, "logits/rejected": -2.303882360458374, "logps/chosen": -1.6743355989456177, "logps/rejected": -6.840859889984131, "loss": 0.788, "rewards/accuracies": 1.0, "rewards/chosen": 0.5770546793937683, "rewards/margins": 0.052866578102111816, "rewards/rejected": 0.5241881012916565, "step": 1019 }, { "epoch": 1.84, "learning_rate": 3.412268077182119e-08, "logits/chosen": -2.365795612335205, "logits/rejected": -2.363443613052368, "logps/chosen": -1.2230665683746338, "logps/rejected": -3.0986413955688477, "loss": 0.5471, "rewards/accuracies": 1.0, "rewards/chosen": 0.7482715845108032, "rewards/margins": 0.3170984387397766, "rewards/rejected": 0.4311731457710266, "step": 1020 }, { "epoch": 1.85, "learning_rate": 3.403013830388495e-08, "logits/chosen": -2.2793595790863037, "logits/rejected": -2.278336763381958, "logps/chosen": -2.7352452278137207, "logps/rejected": -2.925617218017578, "loss": 0.6742, "rewards/accuracies": 0.0, "rewards/chosen": 0.4310176372528076, "rewards/margins": -0.17022883892059326, "rewards/rejected": 0.6012464761734009, "step": 1021 }, { "epoch": 1.85, "learning_rate": 3.3937656717915565e-08, "logits/chosen": -2.3810300827026367, "logits/rejected": -2.4045746326446533, "logps/chosen": -6.986361980438232, "logps/rejected": -11.91641616821289, "loss": 0.5315, "rewards/accuracies": 1.0, "rewards/chosen": 0.9699727892875671, "rewards/margins": 0.045956969261169434, "rewards/rejected": 0.9240158200263977, "step": 1022 }, { "epoch": 1.85, "learning_rate": 3.3845236366480913e-08, "logits/chosen": -2.284175157546997, "logits/rejected": -2.2818989753723145, "logps/chosen": -9.24362564086914, "logps/rejected": -4.603921413421631, "loss": 0.6345, "rewards/accuracies": 1.0, "rewards/chosen": 0.5002743601799011, "rewards/margins": 0.10961899161338806, "rewards/rejected": 0.39065536856651306, "step": 1023 }, { "epoch": 1.85, "learning_rate": 3.375287760191551e-08, "logits/chosen": -2.3280022144317627, "logits/rejected": -2.3350210189819336, "logps/chosen": -1.5526639223098755, "logps/rejected": -9.74233341217041, "loss": 0.634, "rewards/accuracies": 1.0, "rewards/chosen": 0.5957322716712952, "rewards/margins": 0.2634955942630768, "rewards/rejected": 0.3322366774082184, "step": 1024 }, { "epoch": 1.85, "learning_rate": 3.366058077631903e-08, "logits/chosen": -2.3962595462799072, "logits/rejected": -2.474003314971924, "logps/chosen": -1.7888264656066895, "logps/rejected": -8.95966625213623, "loss": 0.7372, "rewards/accuracies": 1.0, "rewards/chosen": 0.7414096593856812, "rewards/margins": 0.18260294198989868, "rewards/rejected": 0.5588067173957825, "step": 1025 }, { "epoch": 1.86, "learning_rate": 3.3568346241555e-08, "logits/chosen": -2.327242851257324, "logits/rejected": -2.3320529460906982, "logps/chosen": -1.9627653360366821, "logps/rejected": -13.165291786193848, "loss": 0.619, "rewards/accuracies": 0.0, "rewards/chosen": 0.648084819316864, "rewards/margins": -0.18225795030593872, "rewards/rejected": 0.8303427696228027, "step": 1026 }, { "epoch": 1.86, "learning_rate": 3.3476174349249535e-08, "logits/chosen": -2.2318663597106934, "logits/rejected": -2.2357115745544434, "logps/chosen": -2.139415740966797, "logps/rejected": -10.474867820739746, "loss": 0.5311, "rewards/accuracies": 1.0, "rewards/chosen": 0.6750845313072205, "rewards/margins": 0.0644536018371582, "rewards/rejected": 0.6106309294700623, "step": 1027 }, { "epoch": 1.86, "learning_rate": 3.338406545078987e-08, "logits/chosen": -2.2544937133789062, "logits/rejected": -2.328240156173706, "logps/chosen": -2.187089443206787, "logps/rejected": -22.977760314941406, "loss": 0.5111, "rewards/accuracies": 1.0, "rewards/chosen": 0.6200596690177917, "rewards/margins": 0.13662880659103394, "rewards/rejected": 0.4834308624267578, "step": 1028 }, { "epoch": 1.86, "learning_rate": 3.3292019897323154e-08, "logits/chosen": -2.334625244140625, "logits/rejected": -2.3364028930664062, "logps/chosen": -11.110795974731445, "logps/rejected": -2.4773998260498047, "loss": 0.5463, "rewards/accuracies": 1.0, "rewards/chosen": 1.035728096961975, "rewards/margins": 0.3946264386177063, "rewards/rejected": 0.6411016583442688, "step": 1029 }, { "epoch": 1.86, "learning_rate": 3.3200038039754993e-08, "logits/chosen": -2.3290698528289795, "logits/rejected": -2.328298568725586, "logps/chosen": -2.4276015758514404, "logps/rejected": -7.547214984893799, "loss": 0.5226, "rewards/accuracies": 1.0, "rewards/chosen": 1.1248958110809326, "rewards/margins": 0.5233384966850281, "rewards/rejected": 0.6015573143959045, "step": 1030 }, { "epoch": 1.86, "learning_rate": 3.310812022874821e-08, "logits/chosen": -2.3470261096954346, "logits/rejected": -2.351762533187866, "logps/chosen": -1.494996190071106, "logps/rejected": -10.905461311340332, "loss": 0.7125, "rewards/accuracies": 0.0, "rewards/chosen": 0.6864625215530396, "rewards/margins": -0.20668011903762817, "rewards/rejected": 0.8931426405906677, "step": 1031 }, { "epoch": 1.87, "learning_rate": 3.30162668147214e-08, "logits/chosen": -2.320150375366211, "logits/rejected": -2.3073225021362305, "logps/chosen": -9.068619728088379, "logps/rejected": 0.0, "loss": 0.5373, "rewards/accuracies": 1.0, "rewards/chosen": 0.6693610548973083, "rewards/margins": 0.6693610548973083, "rewards/rejected": 0.0, "step": 1032 }, { "epoch": 1.87, "learning_rate": 3.2924478147847726e-08, "logits/chosen": -2.2870168685913086, "logits/rejected": -2.2814300060272217, "logps/chosen": -9.085677146911621, "logps/rejected": -2.897578716278076, "loss": 0.7326, "rewards/accuracies": 1.0, "rewards/chosen": 0.6517580151557922, "rewards/margins": 0.043197691440582275, "rewards/rejected": 0.60856032371521, "step": 1033 }, { "epoch": 1.87, "learning_rate": 3.283275457805349e-08, "logits/chosen": -2.35142183303833, "logits/rejected": -2.3510169982910156, "logps/chosen": -7.386965751647949, "logps/rejected": -5.436500549316406, "loss": 0.7049, "rewards/accuracies": 1.0, "rewards/chosen": 0.9626191258430481, "rewards/margins": 0.480267733335495, "rewards/rejected": 0.4823513925075531, "step": 1034 }, { "epoch": 1.87, "learning_rate": 3.274109645501681e-08, "logits/chosen": -2.4022598266601562, "logits/rejected": -2.4034695625305176, "logps/chosen": -2.651709794998169, "logps/rejected": -10.383636474609375, "loss": 0.7266, "rewards/accuracies": 0.0, "rewards/chosen": 0.8069190382957458, "rewards/margins": -0.17913293838500977, "rewards/rejected": 0.9860519766807556, "step": 1035 }, { "epoch": 1.87, "learning_rate": 3.264950412816628e-08, "logits/chosen": -2.271383047103882, "logits/rejected": -2.3158864974975586, "logps/chosen": -2.4457828998565674, "logps/rejected": -33.1237678527832, "loss": 0.5779, "rewards/accuracies": 1.0, "rewards/chosen": 0.7091249823570251, "rewards/margins": 0.14198821783065796, "rewards/rejected": 0.5671367645263672, "step": 1036 }, { "epoch": 1.88, "learning_rate": 3.255797794667974e-08, "logits/chosen": -2.333367347717285, "logits/rejected": -2.3238868713378906, "logps/chosen": -3.0585527420043945, "logps/rejected": -9.270912170410156, "loss": 0.7833, "rewards/accuracies": 0.0, "rewards/chosen": 0.8602003455162048, "rewards/margins": -0.3329208493232727, "rewards/rejected": 1.1931211948394775, "step": 1037 }, { "epoch": 1.88, "learning_rate": 3.24665182594828e-08, "logits/chosen": -2.380552291870117, "logits/rejected": -2.378567695617676, "logps/chosen": -2.0886590480804443, "logps/rejected": -5.575051307678223, "loss": 0.6028, "rewards/accuracies": 1.0, "rewards/chosen": 0.813229501247406, "rewards/margins": 0.2670130133628845, "rewards/rejected": 0.5462164878845215, "step": 1038 }, { "epoch": 1.88, "learning_rate": 3.237512541524756e-08, "logits/chosen": -2.257646083831787, "logits/rejected": -2.2496025562286377, "logps/chosen": -0.9969750046730042, "logps/rejected": -7.471565246582031, "loss": 0.5542, "rewards/accuracies": 1.0, "rewards/chosen": 0.8555940985679626, "rewards/margins": 0.6267755031585693, "rewards/rejected": 0.2288186103105545, "step": 1039 }, { "epoch": 1.88, "learning_rate": 3.228379976239133e-08, "logits/chosen": -2.2737913131713867, "logits/rejected": -2.2677273750305176, "logps/chosen": -13.867867469787598, "logps/rejected": -9.601747512817383, "loss": 0.6969, "rewards/accuracies": 0.0, "rewards/chosen": 0.8543969392776489, "rewards/margins": -0.29004883766174316, "rewards/rejected": 1.144445776939392, "step": 1040 }, { "epoch": 1.88, "learning_rate": 3.21925416490753e-08, "logits/chosen": -2.3845465183258057, "logits/rejected": -2.410742998123169, "logps/chosen": -1.98307204246521, "logps/rejected": -23.028779983520508, "loss": 0.5856, "rewards/accuracies": 0.0, "rewards/chosen": 0.5929562449455261, "rewards/margins": -0.18923693895339966, "rewards/rejected": 0.7821931838989258, "step": 1041 }, { "epoch": 1.88, "learning_rate": 3.2101351423203085e-08, "logits/chosen": -2.376176118850708, "logits/rejected": -2.3730974197387695, "logps/chosen": -4.897025108337402, "logps/rejected": -3.4103593826293945, "loss": 0.5538, "rewards/accuracies": 1.0, "rewards/chosen": 0.7851355671882629, "rewards/margins": 0.27572888135910034, "rewards/rejected": 0.5094066858291626, "step": 1042 }, { "epoch": 1.89, "learning_rate": 3.201022943241957e-08, "logits/chosen": -2.3148012161254883, "logits/rejected": -2.34987211227417, "logps/chosen": -1.6754518747329712, "logps/rejected": -28.58931541442871, "loss": 0.6447, "rewards/accuracies": 0.0, "rewards/chosen": 0.724250853061676, "rewards/margins": -0.17938095331192017, "rewards/rejected": 0.9036318063735962, "step": 1043 }, { "epoch": 1.89, "learning_rate": 3.191917602410947e-08, "logits/chosen": -2.3341548442840576, "logits/rejected": -2.336080551147461, "logps/chosen": -1.4319677352905273, "logps/rejected": -6.253621578216553, "loss": 0.5958, "rewards/accuracies": 1.0, "rewards/chosen": 0.7989039421081543, "rewards/margins": 0.4364655017852783, "rewards/rejected": 0.362438440322876, "step": 1044 }, { "epoch": 1.89, "learning_rate": 3.1828191545396035e-08, "logits/chosen": -2.442308187484741, "logits/rejected": -2.4420368671417236, "logps/chosen": -1.21335768699646, "logps/rejected": -5.302764892578125, "loss": 0.5977, "rewards/accuracies": 1.0, "rewards/chosen": 0.8601263165473938, "rewards/margins": 0.11671066284179688, "rewards/rejected": 0.7434156537055969, "step": 1045 }, { "epoch": 1.89, "learning_rate": 3.173727634313979e-08, "logits/chosen": -2.3394877910614014, "logits/rejected": -2.330605983734131, "logps/chosen": -3.7379939556121826, "logps/rejected": -6.653327941894531, "loss": 0.794, "rewards/accuracies": 0.0, "rewards/chosen": 0.7444462776184082, "rewards/margins": -0.28218352794647217, "rewards/rejected": 1.0266298055648804, "step": 1046 }, { "epoch": 1.89, "learning_rate": 3.1646430763937105e-08, "logits/chosen": -2.185701847076416, "logits/rejected": -2.2255334854125977, "logps/chosen": -7.059867858886719, "logps/rejected": -20.57637596130371, "loss": 0.3989, "rewards/accuracies": 1.0, "rewards/chosen": 0.9605283737182617, "rewards/margins": 0.6413571834564209, "rewards/rejected": 0.31917116045951843, "step": 1047 }, { "epoch": 1.9, "learning_rate": 3.155565515411893e-08, "logits/chosen": -2.3500664234161377, "logits/rejected": -2.3577702045440674, "logps/chosen": -2.551098585128784, "logps/rejected": -5.314914703369141, "loss": 0.5585, "rewards/accuracies": 1.0, "rewards/chosen": 0.8722144365310669, "rewards/margins": 0.2300850749015808, "rewards/rejected": 0.6421293616294861, "step": 1048 }, { "epoch": 1.9, "learning_rate": 3.146494985974947e-08, "logits/chosen": -2.2534308433532715, "logits/rejected": -2.258288621902466, "logps/chosen": -1.4414368867874146, "logps/rejected": -3.8187131881713867, "loss": 0.6047, "rewards/accuracies": 1.0, "rewards/chosen": 0.6674098968505859, "rewards/margins": 0.20198610424995422, "rewards/rejected": 0.4654237926006317, "step": 1049 }, { "epoch": 1.9, "learning_rate": 3.1374315226624934e-08, "logits/chosen": -2.3030619621276855, "logits/rejected": -2.3068249225616455, "logps/chosen": -1.4511346817016602, "logps/rejected": -1.250875473022461, "loss": 0.611, "rewards/accuracies": 0.0, "rewards/chosen": 0.6737287640571594, "rewards/margins": -0.024605393409729004, "rewards/rejected": 0.6983341574668884, "step": 1050 }, { "epoch": 1.9, "learning_rate": 3.1283751600272034e-08, "logits/chosen": -2.1844708919525146, "logits/rejected": -2.224425792694092, "logps/chosen": -0.9304203391075134, "logps/rejected": -6.068915843963623, "loss": 0.857, "rewards/accuracies": 0.0, "rewards/chosen": 0.4652576148509979, "rewards/margins": -0.3698326647281647, "rewards/rejected": 0.8350902795791626, "step": 1051 }, { "epoch": 1.9, "learning_rate": 3.119325932594687e-08, "logits/chosen": -2.2868292331695557, "logits/rejected": -2.2852225303649902, "logps/chosen": -2.132023334503174, "logps/rejected": -2.503601312637329, "loss": 0.7075, "rewards/accuracies": 0.0, "rewards/chosen": 0.5031201243400574, "rewards/margins": -0.04743605852127075, "rewards/rejected": 0.5505561828613281, "step": 1052 }, { "epoch": 1.9, "learning_rate": 3.110283874863347e-08, "logits/chosen": -2.311448335647583, "logits/rejected": -2.3044421672821045, "logps/chosen": -1.811093807220459, "logps/rejected": -3.469275712966919, "loss": 0.8749, "rewards/accuracies": 0.0, "rewards/chosen": 0.5073939561843872, "rewards/margins": -0.07994824647903442, "rewards/rejected": 0.5873422026634216, "step": 1053 }, { "epoch": 1.91, "learning_rate": 3.1012490213042594e-08, "logits/chosen": -2.421491861343384, "logits/rejected": -2.4161980152130127, "logps/chosen": -2.069371461868286, "logps/rejected": -3.7324020862579346, "loss": 0.587, "rewards/accuracies": 1.0, "rewards/chosen": 0.8623082041740417, "rewards/margins": 0.26671504974365234, "rewards/rejected": 0.5955931544303894, "step": 1054 }, { "epoch": 1.91, "learning_rate": 3.0922214063610294e-08, "logits/chosen": -2.3258960247039795, "logits/rejected": -2.326113700866699, "logps/chosen": -3.045764446258545, "logps/rejected": -13.44679069519043, "loss": 0.7265, "rewards/accuracies": 0.0, "rewards/chosen": 0.811627209186554, "rewards/margins": -0.009700953960418701, "rewards/rejected": 0.8213281631469727, "step": 1055 }, { "epoch": 1.91, "learning_rate": 3.0832010644496706e-08, "logits/chosen": -2.2642643451690674, "logits/rejected": -2.253317356109619, "logps/chosen": -3.2695510387420654, "logps/rejected": -3.0558125972747803, "loss": 0.734, "rewards/accuracies": 1.0, "rewards/chosen": 0.8183785676956177, "rewards/margins": 0.29072046279907227, "rewards/rejected": 0.5276581048965454, "step": 1056 }, { "epoch": 1.91, "learning_rate": 3.0741880299584674e-08, "logits/chosen": -2.3385050296783447, "logits/rejected": -2.3272452354431152, "logps/chosen": -6.292402267456055, "logps/rejected": -2.5705928802490234, "loss": 0.7194, "rewards/accuracies": 0.0, "rewards/chosen": 0.8349722027778625, "rewards/margins": -0.0006622076034545898, "rewards/rejected": 0.8356344103813171, "step": 1057 }, { "epoch": 1.91, "learning_rate": 3.065182337247846e-08, "logits/chosen": -2.3335065841674805, "logits/rejected": -2.330221176147461, "logps/chosen": -5.878866672515869, "logps/rejected": -6.810401439666748, "loss": 0.568, "rewards/accuracies": 1.0, "rewards/chosen": 1.1410311460494995, "rewards/margins": 0.4682670831680298, "rewards/rejected": 0.6727640628814697, "step": 1058 }, { "epoch": 1.92, "learning_rate": 3.0561840206502465e-08, "logits/chosen": -2.379283905029297, "logits/rejected": -2.4890496730804443, "logps/chosen": -2.0963892936706543, "logps/rejected": -39.46095275878906, "loss": 0.5462, "rewards/accuracies": 1.0, "rewards/chosen": 0.5816383957862854, "rewards/margins": 0.3085167407989502, "rewards/rejected": 0.2731216549873352, "step": 1059 }, { "epoch": 1.92, "learning_rate": 3.047193114469986e-08, "logits/chosen": -2.2718276977539062, "logits/rejected": -2.4117705821990967, "logps/chosen": -8.358196258544922, "logps/rejected": -33.185367584228516, "loss": 0.7564, "rewards/accuracies": 0.0, "rewards/chosen": 0.5806789398193359, "rewards/margins": -0.18315279483795166, "rewards/rejected": 0.7638317346572876, "step": 1060 }, { "epoch": 1.92, "learning_rate": 3.03820965298313e-08, "logits/chosen": -2.320434808731079, "logits/rejected": -2.328162908554077, "logps/chosen": -1.6313937902450562, "logps/rejected": -4.542113304138184, "loss": 0.6037, "rewards/accuracies": 1.0, "rewards/chosen": 0.5845749974250793, "rewards/margins": 0.24377533793449402, "rewards/rejected": 0.3407996594905853, "step": 1061 }, { "epoch": 1.92, "learning_rate": 3.0292336704373645e-08, "logits/chosen": -2.267878293991089, "logits/rejected": -2.2773728370666504, "logps/chosen": -1.279286503791809, "logps/rejected": -3.175644874572754, "loss": 0.6263, "rewards/accuracies": 1.0, "rewards/chosen": 0.5106782913208008, "rewards/margins": 0.11705654859542847, "rewards/rejected": 0.3936217427253723, "step": 1062 }, { "epoch": 1.92, "learning_rate": 3.020265201051866e-08, "logits/chosen": -2.1663498878479004, "logits/rejected": -2.1709303855895996, "logps/chosen": -1.6492023468017578, "logps/rejected": -2.589766502380371, "loss": 0.5741, "rewards/accuracies": 1.0, "rewards/chosen": 0.8054148554801941, "rewards/margins": 0.27908414602279663, "rewards/rejected": 0.5263307094573975, "step": 1063 }, { "epoch": 1.92, "learning_rate": 3.011304279017163e-08, "logits/chosen": -2.2225255966186523, "logits/rejected": -2.2954118251800537, "logps/chosen": -3.2037391662597656, "logps/rejected": -22.135135650634766, "loss": 0.5491, "rewards/accuracies": 1.0, "rewards/chosen": 0.7420811057090759, "rewards/margins": 0.271379679441452, "rewards/rejected": 0.4707014262676239, "step": 1064 }, { "epoch": 1.93, "learning_rate": 3.002350938495016e-08, "logits/chosen": -2.366434335708618, "logits/rejected": -2.37518048286438, "logps/chosen": -3.5265729427337646, "logps/rejected": -5.485661506652832, "loss": 0.45, "rewards/accuracies": 1.0, "rewards/chosen": 0.7278076410293579, "rewards/margins": 0.25273334980010986, "rewards/rejected": 0.47507429122924805, "step": 1065 }, { "epoch": 1.93, "learning_rate": 2.993405213618277e-08, "logits/chosen": -2.3298652172088623, "logits/rejected": -2.3770575523376465, "logps/chosen": -5.194465160369873, "logps/rejected": -23.512632369995117, "loss": 0.6752, "rewards/accuracies": 1.0, "rewards/chosen": 0.6354876160621643, "rewards/margins": 0.0534365177154541, "rewards/rejected": 0.5820510983467102, "step": 1066 }, { "epoch": 1.93, "learning_rate": 2.9844671384907724e-08, "logits/chosen": -2.3623576164245605, "logits/rejected": -2.2745673656463623, "logps/chosen": -11.796435356140137, "logps/rejected": -1.8135379552841187, "loss": 0.7646, "rewards/accuracies": 0.0, "rewards/chosen": 0.4944729804992676, "rewards/margins": -0.28180062770843506, "rewards/rejected": 0.7762736082077026, "step": 1067 }, { "epoch": 1.93, "learning_rate": 2.9755367471871608e-08, "logits/chosen": -2.261702537536621, "logits/rejected": -2.2587246894836426, "logps/chosen": -9.576741218566895, "logps/rejected": -2.3094751834869385, "loss": 0.6097, "rewards/accuracies": 0.0, "rewards/chosen": 0.3972737491130829, "rewards/margins": -0.13486185669898987, "rewards/rejected": 0.5321356058120728, "step": 1068 }, { "epoch": 1.93, "learning_rate": 2.966614073752807e-08, "logits/chosen": -2.330770492553711, "logits/rejected": -2.335761785507202, "logps/chosen": -0.8513873815536499, "logps/rejected": -3.9354982376098633, "loss": 0.5581, "rewards/accuracies": 1.0, "rewards/chosen": 0.8717226982116699, "rewards/margins": 0.4523322880268097, "rewards/rejected": 0.41939041018486023, "step": 1069 }, { "epoch": 1.93, "learning_rate": 2.957699152203652e-08, "logits/chosen": -2.2462751865386963, "logits/rejected": -2.226362705230713, "logps/chosen": -5.57936429977417, "logps/rejected": -2.5407583713531494, "loss": 0.5397, "rewards/accuracies": 1.0, "rewards/chosen": 1.060181975364685, "rewards/margins": 0.5476917624473572, "rewards/rejected": 0.5124902129173279, "step": 1070 }, { "epoch": 1.94, "learning_rate": 2.9487920165260883e-08, "logits/chosen": -2.297457218170166, "logits/rejected": -2.382901906967163, "logps/chosen": -1.621193289756775, "logps/rejected": -34.81886291503906, "loss": 0.62, "rewards/accuracies": 0.0, "rewards/chosen": 0.6292808055877686, "rewards/margins": -0.08670049905776978, "rewards/rejected": 0.7159813046455383, "step": 1071 }, { "epoch": 1.94, "learning_rate": 2.939892700676827e-08, "logits/chosen": -2.372945547103882, "logits/rejected": -2.3683114051818848, "logps/chosen": -1.2234188318252563, "logps/rejected": -1.8275339603424072, "loss": 0.6829, "rewards/accuracies": 0.0, "rewards/chosen": 0.5210482478141785, "rewards/margins": -0.13690215349197388, "rewards/rejected": 0.6579504013061523, "step": 1072 }, { "epoch": 1.94, "learning_rate": 2.931001238582762e-08, "logits/chosen": -2.394486904144287, "logits/rejected": -2.2571969032287598, "logps/chosen": -25.16316032409668, "logps/rejected": -2.3285484313964844, "loss": 0.5272, "rewards/accuracies": 1.0, "rewards/chosen": 0.9785257577896118, "rewards/margins": 0.5010443329811096, "rewards/rejected": 0.4774814248085022, "step": 1073 }, { "epoch": 1.94, "learning_rate": 2.9221176641408513e-08, "logits/chosen": -2.3622138500213623, "logits/rejected": -2.3311564922332764, "logps/chosen": -2.8225724697113037, "logps/rejected": 0.0, "loss": 0.4394, "rewards/accuracies": 1.0, "rewards/chosen": 0.8279424905776978, "rewards/margins": 0.8279424905776978, "rewards/rejected": 0.0, "step": 1074 }, { "epoch": 1.94, "learning_rate": 2.9132420112179767e-08, "logits/chosen": -2.327741861343384, "logits/rejected": -2.378441095352173, "logps/chosen": -0.7719986438751221, "logps/rejected": -12.794427871704102, "loss": 0.6948, "rewards/accuracies": 0.0, "rewards/chosen": 0.4750853478908539, "rewards/margins": -0.10096332430839539, "rewards/rejected": 0.5760486721992493, "step": 1075 }, { "epoch": 1.95, "learning_rate": 2.904374313650831e-08, "logits/chosen": -2.2307205200195312, "logits/rejected": -2.2086002826690674, "logps/chosen": -1.7610901594161987, "logps/rejected": 0.0, "loss": 0.4576, "rewards/accuracies": 1.0, "rewards/chosen": 0.6781358122825623, "rewards/margins": 0.6781358122825623, "rewards/rejected": 0.0, "step": 1076 }, { "epoch": 1.95, "learning_rate": 2.895514605245769e-08, "logits/chosen": -2.3910815715789795, "logits/rejected": -2.3974781036376953, "logps/chosen": -2.3959202766418457, "logps/rejected": -5.029928684234619, "loss": 0.6019, "rewards/accuracies": 1.0, "rewards/chosen": 0.7905105948448181, "rewards/margins": 0.40572232007980347, "rewards/rejected": 0.38478827476501465, "step": 1077 }, { "epoch": 1.95, "learning_rate": 2.8866629197786942e-08, "logits/chosen": -2.2534542083740234, "logits/rejected": -2.4129488468170166, "logps/chosen": -1.227590560913086, "logps/rejected": -32.093055725097656, "loss": 0.4285, "rewards/accuracies": 1.0, "rewards/chosen": 0.7339875102043152, "rewards/margins": 0.5574921369552612, "rewards/rejected": 0.17649535834789276, "step": 1078 }, { "epoch": 1.95, "learning_rate": 2.877819290994917e-08, "logits/chosen": -2.237910270690918, "logits/rejected": -2.3084778785705566, "logps/chosen": -2.1363441944122314, "logps/rejected": -29.865713119506836, "loss": 0.6168, "rewards/accuracies": 1.0, "rewards/chosen": 0.5643219947814941, "rewards/margins": 0.5973626375198364, "rewards/rejected": -0.033040620386600494, "step": 1079 }, { "epoch": 1.95, "learning_rate": 2.8689837526090455e-08, "logits/chosen": -2.278055191040039, "logits/rejected": -2.2786309719085693, "logps/chosen": -1.2239991426467896, "logps/rejected": -4.2461018562316895, "loss": 0.5729, "rewards/accuracies": 1.0, "rewards/chosen": 0.7561876177787781, "rewards/margins": 0.36872199177742004, "rewards/rejected": 0.38746562600135803, "step": 1080 }, { "epoch": 1.95, "learning_rate": 2.860156338304831e-08, "logits/chosen": -2.3953986167907715, "logits/rejected": -2.4153177738189697, "logps/chosen": -6.142863750457764, "logps/rejected": -12.22285270690918, "loss": 0.6318, "rewards/accuracies": 1.0, "rewards/chosen": 1.1802576780319214, "rewards/margins": 0.6233320832252502, "rewards/rejected": 0.5569255948066711, "step": 1081 }, { "epoch": 1.96, "learning_rate": 2.8513370817350657e-08, "logits/chosen": -2.322770595550537, "logits/rejected": -2.2970547676086426, "logps/chosen": -0.8583656549453735, "logps/rejected": 0.0, "loss": 0.5781, "rewards/accuracies": 1.0, "rewards/chosen": 0.5292627215385437, "rewards/margins": 0.5292627215385437, "rewards/rejected": 0.0, "step": 1082 }, { "epoch": 1.96, "learning_rate": 2.8425260165214327e-08, "logits/chosen": -2.3147084712982178, "logits/rejected": -2.4647867679595947, "logps/chosen": -1.6623682975769043, "logps/rejected": -25.897977828979492, "loss": 0.4182, "rewards/accuracies": 1.0, "rewards/chosen": 0.6506494879722595, "rewards/margins": 0.42349934577941895, "rewards/rejected": 0.22715015709400177, "step": 1083 }, { "epoch": 1.96, "learning_rate": 2.83372317625439e-08, "logits/chosen": -2.4074583053588867, "logits/rejected": -2.4035491943359375, "logps/chosen": -2.1681766510009766, "logps/rejected": -1.2576243877410889, "loss": 0.5758, "rewards/accuracies": 1.0, "rewards/chosen": 0.9390465617179871, "rewards/margins": 0.24026858806610107, "rewards/rejected": 0.698777973651886, "step": 1084 }, { "epoch": 1.96, "learning_rate": 2.8249285944930446e-08, "logits/chosen": -2.2560551166534424, "logits/rejected": -2.2494163513183594, "logps/chosen": -1.2315000295639038, "logps/rejected": -6.07533073425293, "loss": 0.6766, "rewards/accuracies": 1.0, "rewards/chosen": 0.6784783005714417, "rewards/margins": 0.06740546226501465, "rewards/rejected": 0.611072838306427, "step": 1085 }, { "epoch": 1.96, "learning_rate": 2.8161423047650122e-08, "logits/chosen": -2.275944471359253, "logits/rejected": -2.3026692867279053, "logps/chosen": -1.287851333618164, "logps/rejected": -25.12204360961914, "loss": 0.5875, "rewards/accuracies": 0.0, "rewards/chosen": 0.6057350635528564, "rewards/margins": -0.04300534725189209, "rewards/rejected": 0.6487404108047485, "step": 1086 }, { "epoch": 1.97, "learning_rate": 2.8073643405662984e-08, "logits/chosen": -2.273669481277466, "logits/rejected": -2.2742128372192383, "logps/chosen": -9.811046600341797, "logps/rejected": -4.9715375900268555, "loss": 0.616, "rewards/accuracies": 0.0, "rewards/chosen": 0.7550125122070312, "rewards/margins": -0.04773324728012085, "rewards/rejected": 0.8027457594871521, "step": 1087 }, { "epoch": 1.97, "learning_rate": 2.7985947353611727e-08, "logits/chosen": -2.353370189666748, "logits/rejected": -2.3057870864868164, "logps/chosen": -7.479187965393066, "logps/rejected": -2.943631887435913, "loss": 0.7945, "rewards/accuracies": 0.0, "rewards/chosen": 0.5520545840263367, "rewards/margins": -0.16279757022857666, "rewards/rejected": 0.7148521542549133, "step": 1088 }, { "epoch": 1.97, "learning_rate": 2.7898335225820374e-08, "logits/chosen": -2.2622897624969482, "logits/rejected": -2.251845359802246, "logps/chosen": -2.3649816513061523, "logps/rejected": -6.93300724029541, "loss": 0.5597, "rewards/accuracies": 0.0, "rewards/chosen": 0.5642164349555969, "rewards/margins": -0.2253926396369934, "rewards/rejected": 0.7896090745925903, "step": 1089 }, { "epoch": 1.97, "learning_rate": 2.781080735629297e-08, "logits/chosen": -2.3352415561676025, "logits/rejected": -2.396024227142334, "logps/chosen": -2.969111442565918, "logps/rejected": -14.8165283203125, "loss": 0.7091, "rewards/accuracies": 0.0, "rewards/chosen": 0.4367995262145996, "rewards/margins": -0.4611111879348755, "rewards/rejected": 0.8979107141494751, "step": 1090 }, { "epoch": 1.97, "learning_rate": 2.7723364078712353e-08, "logits/chosen": -2.2254812717437744, "logits/rejected": -2.2846038341522217, "logps/chosen": -1.6785249710083008, "logps/rejected": -22.143260955810547, "loss": 0.5296, "rewards/accuracies": 1.0, "rewards/chosen": 0.7754069566726685, "rewards/margins": 0.23061800003051758, "rewards/rejected": 0.5447889566421509, "step": 1091 }, { "epoch": 1.97, "learning_rate": 2.7636005726438866e-08, "logits/chosen": -2.336954116821289, "logits/rejected": -2.4480302333831787, "logps/chosen": -2.303957462310791, "logps/rejected": -23.493221282958984, "loss": 0.5333, "rewards/accuracies": 1.0, "rewards/chosen": 0.6264087557792664, "rewards/margins": 0.5314475893974304, "rewards/rejected": 0.09496116638183594, "step": 1092 }, { "epoch": 1.98, "learning_rate": 2.754873263250913e-08, "logits/chosen": -2.2886202335357666, "logits/rejected": -2.4532949924468994, "logps/chosen": -1.9944713115692139, "logps/rejected": -47.46363830566406, "loss": 0.5013, "rewards/accuracies": 1.0, "rewards/chosen": 0.7582135200500488, "rewards/margins": 0.13296937942504883, "rewards/rejected": 0.625244140625, "step": 1093 }, { "epoch": 1.98, "learning_rate": 2.7461545129634666e-08, "logits/chosen": -2.2852418422698975, "logits/rejected": -2.285271406173706, "logps/chosen": -2.315070152282715, "logps/rejected": -7.1590399742126465, "loss": 0.6552, "rewards/accuracies": 1.0, "rewards/chosen": 0.7061930894851685, "rewards/margins": 0.24292045831680298, "rewards/rejected": 0.4632726311683655, "step": 1094 }, { "epoch": 1.98, "learning_rate": 2.737444355020079e-08, "logits/chosen": -2.3625428676605225, "logits/rejected": -2.3624863624572754, "logps/chosen": -0.7470706701278687, "logps/rejected": -1.6499301195144653, "loss": 0.5925, "rewards/accuracies": 1.0, "rewards/chosen": 0.8554415106773376, "rewards/margins": 0.30383700132369995, "rewards/rejected": 0.5516045093536377, "step": 1095 }, { "epoch": 1.98, "learning_rate": 2.7287428226265154e-08, "logits/chosen": -2.373894691467285, "logits/rejected": -2.3719358444213867, "logps/chosen": -1.501394510269165, "logps/rejected": -1.3356298208236694, "loss": 0.7652, "rewards/accuracies": 1.0, "rewards/chosen": 0.7346205115318298, "rewards/margins": 0.27372464537620544, "rewards/rejected": 0.4608958661556244, "step": 1096 }, { "epoch": 1.98, "learning_rate": 2.720049948955662e-08, "logits/chosen": -2.3905746936798096, "logits/rejected": -2.360417127609253, "logps/chosen": -18.92977523803711, "logps/rejected": -17.789995193481445, "loss": 0.6227, "rewards/accuracies": 1.0, "rewards/chosen": 0.6856966018676758, "rewards/margins": 0.21618422865867615, "rewards/rejected": 0.46951237320899963, "step": 1097 }, { "epoch": 1.99, "learning_rate": 2.7113657671473987e-08, "logits/chosen": -2.370507001876831, "logits/rejected": -2.384106397628784, "logps/chosen": -1.951218605041504, "logps/rejected": -11.107354164123535, "loss": 0.5757, "rewards/accuracies": 1.0, "rewards/chosen": 0.6791624426841736, "rewards/margins": 0.20119431614875793, "rewards/rejected": 0.47796812653541565, "step": 1098 }, { "epoch": 1.99, "learning_rate": 2.7026903103084624e-08, "logits/chosen": -2.2181389331817627, "logits/rejected": -2.206641435623169, "logps/chosen": -8.664337158203125, "logps/rejected": -1.6409343481063843, "loss": 0.5796, "rewards/accuracies": 1.0, "rewards/chosen": 0.9160404205322266, "rewards/margins": 0.31099796295166016, "rewards/rejected": 0.6050424575805664, "step": 1099 }, { "epoch": 1.99, "learning_rate": 2.6940236115123356e-08, "logits/chosen": -2.2629008293151855, "logits/rejected": -2.298321485519409, "logps/chosen": -2.554605484008789, "logps/rejected": -24.986927032470703, "loss": 0.6926, "rewards/accuracies": 0.0, "rewards/chosen": 0.5353240370750427, "rewards/margins": -0.006639719009399414, "rewards/rejected": 0.5419637560844421, "step": 1100 }, { "epoch": 1.99, "learning_rate": 2.685365703799104e-08, "logits/chosen": -2.1974096298217773, "logits/rejected": -2.23537540435791, "logps/chosen": -1.719490647315979, "logps/rejected": -24.385099411010742, "loss": 0.5288, "rewards/accuracies": 1.0, "rewards/chosen": 0.5948628187179565, "rewards/margins": 0.2471729815006256, "rewards/rejected": 0.34768983721733093, "step": 1101 }, { "epoch": 1.99, "learning_rate": 2.6767166201753465e-08, "logits/chosen": -2.3376340866088867, "logits/rejected": -2.3416249752044678, "logps/chosen": -0.9803964495658875, "logps/rejected": -2.3718366622924805, "loss": 0.5916, "rewards/accuracies": 1.0, "rewards/chosen": 0.8101493716239929, "rewards/margins": 0.30572080612182617, "rewards/rejected": 0.5044285655021667, "step": 1102 }, { "epoch": 1.99, "learning_rate": 2.668076393613999e-08, "logits/chosen": -2.3578250408172607, "logits/rejected": -2.362457513809204, "logps/chosen": -0.42406409978866577, "logps/rejected": -1.680396556854248, "loss": 0.7973, "rewards/accuracies": 0.0, "rewards/chosen": 0.5149045586585999, "rewards/margins": -0.08870398998260498, "rewards/rejected": 0.6036085486412048, "step": 1103 }, { "epoch": 2.0, "learning_rate": 2.6594450570542293e-08, "logits/chosen": -2.2921199798583984, "logits/rejected": -2.2844831943511963, "logps/chosen": -3.0772504806518555, "logps/rejected": -10.729945182800293, "loss": 0.751, "rewards/accuracies": 0.0, "rewards/chosen": 0.6717762351036072, "rewards/margins": -0.439177930355072, "rewards/rejected": 1.1109541654586792, "step": 1104 }, { "epoch": 2.0, "learning_rate": 2.6508226434013155e-08, "logits/chosen": -2.3064115047454834, "logits/rejected": -2.3086183071136475, "logps/chosen": -14.278946876525879, "logps/rejected": -6.833683967590332, "loss": 0.7843, "rewards/accuracies": 1.0, "rewards/chosen": 0.8610650300979614, "rewards/margins": 0.16818839311599731, "rewards/rejected": 0.6928766369819641, "step": 1105 }, { "epoch": 2.0, "learning_rate": 2.6422091855265193e-08, "logits/chosen": -2.2989683151245117, "logits/rejected": -2.304018020629883, "logps/chosen": -1.653343677520752, "logps/rejected": -8.575416564941406, "loss": 0.6899, "rewards/accuracies": 0.0, "rewards/chosen": 0.8529664874076843, "rewards/margins": -0.24242478609085083, "rewards/rejected": 1.0953912734985352, "step": 1106 }, { "epoch": 2.0, "learning_rate": 2.6336047162669644e-08, "logits/chosen": -2.174819231033325, "logits/rejected": -2.1746699810028076, "logps/chosen": -1.9039556980133057, "logps/rejected": -8.513894081115723, "loss": 0.7953, "rewards/accuracies": 0.0, "rewards/chosen": 0.5697828531265259, "rewards/margins": -0.5357680320739746, "rewards/rejected": 1.1055508852005005, "step": 1107 }, { "epoch": 2.0, "learning_rate": 2.6250092684254992e-08, "logits/chosen": -2.3046517372131348, "logits/rejected": -2.3115501403808594, "logps/chosen": -2.5245487689971924, "logps/rejected": -4.199082374572754, "loss": 0.6266, "rewards/accuracies": 1.0, "rewards/chosen": 0.6333487629890442, "rewards/margins": 0.12152987718582153, "rewards/rejected": 0.5118188858032227, "step": 1108 }, { "epoch": 2.01, "learning_rate": 2.6164228747705846e-08, "logits/chosen": -2.1812758445739746, "logits/rejected": -2.3403217792510986, "logps/chosen": -1.664291501045227, "logps/rejected": -22.392635345458984, "loss": 0.5096, "rewards/accuracies": 1.0, "rewards/chosen": 0.9514042139053345, "rewards/margins": 0.7383232116699219, "rewards/rejected": 0.2130809873342514, "step": 1109 }, { "epoch": 2.01, "learning_rate": 2.60784556803616e-08, "logits/chosen": -2.4075324535369873, "logits/rejected": -2.40801739692688, "logps/chosen": -4.169190883636475, "logps/rejected": -5.314204216003418, "loss": 0.9796, "rewards/accuracies": 0.0, "rewards/chosen": 0.6885011196136475, "rewards/margins": -0.2789022922515869, "rewards/rejected": 0.9674034118652344, "step": 1110 }, { "epoch": 2.01, "learning_rate": 2.5992773809215305e-08, "logits/chosen": -2.283510446548462, "logits/rejected": -2.2876267433166504, "logps/chosen": -0.9805926084518433, "logps/rejected": -1.3155498504638672, "loss": 0.6162, "rewards/accuracies": 0.0, "rewards/chosen": 0.5929434895515442, "rewards/margins": -0.04172515869140625, "rewards/rejected": 0.6346686482429504, "step": 1111 }, { "epoch": 2.01, "learning_rate": 2.5907183460912264e-08, "logits/chosen": -2.2785418033599854, "logits/rejected": -2.2795777320861816, "logps/chosen": -1.9285728931427002, "logps/rejected": -3.679370641708374, "loss": 0.5989, "rewards/accuracies": 1.0, "rewards/chosen": 0.7009782195091248, "rewards/margins": 0.13860350847244263, "rewards/rejected": 0.5623747110366821, "step": 1112 }, { "epoch": 2.01, "learning_rate": 2.5821684961748925e-08, "logits/chosen": -2.4107584953308105, "logits/rejected": -2.3248395919799805, "logps/chosen": -12.941481590270996, "logps/rejected": -9.064661979675293, "loss": 0.6111, "rewards/accuracies": 1.0, "rewards/chosen": 0.8460536003112793, "rewards/margins": 0.36055105924606323, "rewards/rejected": 0.48550254106521606, "step": 1113 }, { "epoch": 2.01, "learning_rate": 2.573627863767152e-08, "logits/chosen": -2.4076030254364014, "logits/rejected": -2.4073448181152344, "logps/chosen": -1.7316381931304932, "logps/rejected": -3.52895450592041, "loss": 0.5395, "rewards/accuracies": 1.0, "rewards/chosen": 0.9694990515708923, "rewards/margins": 0.38223153352737427, "rewards/rejected": 0.5872675180435181, "step": 1114 }, { "epoch": 2.02, "learning_rate": 2.5650964814274955e-08, "logits/chosen": -2.1655077934265137, "logits/rejected": -2.169581174850464, "logps/chosen": -1.7074055671691895, "logps/rejected": -2.545988082885742, "loss": 0.4708, "rewards/accuracies": 1.0, "rewards/chosen": 0.7995945811271667, "rewards/margins": 0.2688860297203064, "rewards/rejected": 0.5307085514068604, "step": 1115 }, { "epoch": 2.02, "learning_rate": 2.5565743816801444e-08, "logits/chosen": -2.3047585487365723, "logits/rejected": -2.3130083084106445, "logps/chosen": -0.6724367737770081, "logps/rejected": -3.050185203552246, "loss": 0.6131, "rewards/accuracies": 1.0, "rewards/chosen": 0.7278987765312195, "rewards/margins": 0.24155327677726746, "rewards/rejected": 0.486345499753952, "step": 1116 }, { "epoch": 2.02, "learning_rate": 2.5480615970139318e-08, "logits/chosen": -2.2640621662139893, "logits/rejected": -2.297560453414917, "logps/chosen": -2.5566515922546387, "logps/rejected": -25.152843475341797, "loss": 0.6579, "rewards/accuracies": 1.0, "rewards/chosen": 0.5351194739341736, "rewards/margins": 0.009747326374053955, "rewards/rejected": 0.5253721475601196, "step": 1117 }, { "epoch": 2.02, "learning_rate": 2.5395581598821804e-08, "logits/chosen": -2.2438206672668457, "logits/rejected": -2.2493882179260254, "logps/chosen": -0.8771973252296448, "logps/rejected": -6.4615864753723145, "loss": 0.6933, "rewards/accuracies": 1.0, "rewards/chosen": 0.66382896900177, "rewards/margins": 0.2813577353954315, "rewards/rejected": 0.3824712336063385, "step": 1118 }, { "epoch": 2.02, "learning_rate": 2.5310641027025763e-08, "logits/chosen": -2.232783555984497, "logits/rejected": -2.2147200107574463, "logps/chosen": -5.816184997558594, "logps/rejected": 0.0, "loss": 0.5296, "rewards/accuracies": 1.0, "rewards/chosen": 0.9753339886665344, "rewards/margins": 0.9753339886665344, "rewards/rejected": 0.0, "step": 1119 }, { "epoch": 2.03, "learning_rate": 2.5225794578570514e-08, "logits/chosen": -2.3902173042297363, "logits/rejected": -2.39636492729187, "logps/chosen": -2.311490297317505, "logps/rejected": -5.022794723510742, "loss": 0.5913, "rewards/accuracies": 1.0, "rewards/chosen": 0.798953652381897, "rewards/margins": 0.4134519696235657, "rewards/rejected": 0.3855016827583313, "step": 1120 }, { "epoch": 2.03, "learning_rate": 2.5141042576916483e-08, "logits/chosen": -2.4069437980651855, "logits/rejected": -2.3804068565368652, "logps/chosen": -1.6749906539916992, "logps/rejected": 0.0, "loss": 0.4953, "rewards/accuracies": 1.0, "rewards/chosen": 0.5738980174064636, "rewards/margins": 0.5738980174064636, "rewards/rejected": 0.0, "step": 1121 }, { "epoch": 2.03, "learning_rate": 2.5056385345164043e-08, "logits/chosen": -2.2998130321502686, "logits/rejected": -2.2767465114593506, "logps/chosen": -3.1006150245666504, "logps/rejected": 0.0, "loss": 0.4854, "rewards/accuracies": 1.0, "rewards/chosen": 0.7705118060112, "rewards/margins": 0.7705118060112, "rewards/rejected": 0.0, "step": 1122 }, { "epoch": 2.03, "learning_rate": 2.497182320605233e-08, "logits/chosen": -2.252887487411499, "logits/rejected": -2.2524821758270264, "logps/chosen": -1.467616319656372, "logps/rejected": -5.460184574127197, "loss": 0.5928, "rewards/accuracies": 1.0, "rewards/chosen": 0.728640079498291, "rewards/margins": 0.3494594097137451, "rewards/rejected": 0.3791806697845459, "step": 1123 }, { "epoch": 2.03, "learning_rate": 2.4887356481957873e-08, "logits/chosen": -2.326826572418213, "logits/rejected": -2.3300111293792725, "logps/chosen": -1.5622726678848267, "logps/rejected": -10.950342178344727, "loss": 0.6067, "rewards/accuracies": 1.0, "rewards/chosen": 0.5412059426307678, "rewards/margins": 0.14714893698692322, "rewards/rejected": 0.3940570056438446, "step": 1124 }, { "epoch": 2.03, "learning_rate": 2.4802985494893554e-08, "logits/chosen": -2.293280601501465, "logits/rejected": -2.3129069805145264, "logps/chosen": -6.789262294769287, "logps/rejected": -24.11282730102539, "loss": 0.4864, "rewards/accuracies": 1.0, "rewards/chosen": 0.9406715631484985, "rewards/margins": 0.6465847492218018, "rewards/rejected": 0.29408684372901917, "step": 1125 }, { "epoch": 2.04, "learning_rate": 2.4718710566507184e-08, "logits/chosen": -2.2720181941986084, "logits/rejected": -2.2744767665863037, "logps/chosen": -1.597823143005371, "logps/rejected": -5.514286041259766, "loss": 0.5477, "rewards/accuracies": 1.0, "rewards/chosen": 0.6932870149612427, "rewards/margins": 0.5127463340759277, "rewards/rejected": 0.18054066598415375, "step": 1126 }, { "epoch": 2.04, "learning_rate": 2.46345320180804e-08, "logits/chosen": -2.256601095199585, "logits/rejected": -2.2392940521240234, "logps/chosen": -4.771538734436035, "logps/rejected": 0.0, "loss": 0.3872, "rewards/accuracies": 1.0, "rewards/chosen": 0.6247990727424622, "rewards/margins": 0.6247990727424622, "rewards/rejected": 0.0, "step": 1127 }, { "epoch": 2.04, "learning_rate": 2.4550450170527448e-08, "logits/chosen": -2.302605152130127, "logits/rejected": -2.2977495193481445, "logps/chosen": -1.1953762769699097, "logps/rejected": -4.909139633178711, "loss": 0.6523, "rewards/accuracies": 0.0, "rewards/chosen": 0.6110386848449707, "rewards/margins": -0.031091153621673584, "rewards/rejected": 0.6421298384666443, "step": 1128 }, { "epoch": 2.04, "learning_rate": 2.4466465344393867e-08, "logits/chosen": -2.3594319820404053, "logits/rejected": -2.333791971206665, "logps/chosen": -1.0413124561309814, "logps/rejected": 0.0, "loss": 0.5853, "rewards/accuracies": 1.0, "rewards/chosen": 0.6406151652336121, "rewards/margins": 0.6406151652336121, "rewards/rejected": 0.0, "step": 1129 }, { "epoch": 2.04, "learning_rate": 2.4382577859855324e-08, "logits/chosen": -2.2912437915802, "logits/rejected": -2.296419382095337, "logps/chosen": -0.9392217993736267, "logps/rejected": -2.8768362998962402, "loss": 0.6432, "rewards/accuracies": 1.0, "rewards/chosen": 0.7119077444076538, "rewards/margins": 0.0900992751121521, "rewards/rejected": 0.6218084692955017, "step": 1130 }, { "epoch": 2.05, "learning_rate": 2.4298788036716426e-08, "logits/chosen": -2.3446168899536133, "logits/rejected": -2.3136179447174072, "logps/chosen": -1.9816852807998657, "logps/rejected": 0.0, "loss": 0.7936, "rewards/accuracies": 1.0, "rewards/chosen": 0.7901071906089783, "rewards/margins": 0.7901071906089783, "rewards/rejected": 0.0, "step": 1131 }, { "epoch": 2.05, "learning_rate": 2.4215096194409466e-08, "logits/chosen": -2.1404120922088623, "logits/rejected": -2.139549493789673, "logps/chosen": -0.7435480356216431, "logps/rejected": -2.5752875804901123, "loss": 0.6261, "rewards/accuracies": 1.0, "rewards/chosen": 0.5401520133018494, "rewards/margins": 0.14646226167678833, "rewards/rejected": 0.39368975162506104, "step": 1132 }, { "epoch": 2.05, "learning_rate": 2.413150265199317e-08, "logits/chosen": -2.340578556060791, "logits/rejected": -2.268380641937256, "logps/chosen": -32.15281677246094, "logps/rejected": -2.9334683418273926, "loss": 0.4386, "rewards/accuracies": 1.0, "rewards/chosen": 0.5817409753799438, "rewards/margins": 0.16314074397087097, "rewards/rejected": 0.4186002314090729, "step": 1133 }, { "epoch": 2.05, "learning_rate": 2.404800772815152e-08, "logits/chosen": -2.3996715545654297, "logits/rejected": -2.396261692047119, "logps/chosen": -0.7439886331558228, "logps/rejected": -6.5988569259643555, "loss": 0.6535, "rewards/accuracies": 1.0, "rewards/chosen": 0.7071793675422668, "rewards/margins": 0.2173512578010559, "rewards/rejected": 0.48982810974121094, "step": 1134 }, { "epoch": 2.05, "learning_rate": 2.3964611741192536e-08, "logits/chosen": -2.3765532970428467, "logits/rejected": -2.368631362915039, "logps/chosen": -3.153902292251587, "logps/rejected": -13.56751823425293, "loss": 0.5533, "rewards/accuracies": 1.0, "rewards/chosen": 0.693700909614563, "rewards/margins": 0.3571700155735016, "rewards/rejected": 0.3365308940410614, "step": 1135 }, { "epoch": 2.05, "learning_rate": 2.3881315009047104e-08, "logits/chosen": -2.2091333866119385, "logits/rejected": -2.227874994277954, "logps/chosen": -9.115007400512695, "logps/rejected": -12.73409366607666, "loss": 0.5355, "rewards/accuracies": 0.0, "rewards/chosen": 0.2593112885951996, "rewards/margins": -0.07318258285522461, "rewards/rejected": 0.3324938714504242, "step": 1136 }, { "epoch": 2.06, "learning_rate": 2.3798117849267642e-08, "logits/chosen": -2.236825466156006, "logits/rejected": -2.2383670806884766, "logps/chosen": -2.547145366668701, "logps/rejected": -10.944210052490234, "loss": 0.746, "rewards/accuracies": 0.0, "rewards/chosen": 0.7214241623878479, "rewards/margins": -0.12502819299697876, "rewards/rejected": 0.8464523553848267, "step": 1137 }, { "epoch": 2.06, "learning_rate": 2.371502057902705e-08, "logits/chosen": -2.205101490020752, "logits/rejected": -2.231287956237793, "logps/chosen": -1.536407232284546, "logps/rejected": -11.011144638061523, "loss": 0.6594, "rewards/accuracies": 1.0, "rewards/chosen": 0.7847706079483032, "rewards/margins": 0.4961320161819458, "rewards/rejected": 0.2886385917663574, "step": 1138 }, { "epoch": 2.06, "learning_rate": 2.3632023515117366e-08, "logits/chosen": -2.452211380004883, "logits/rejected": -2.458228588104248, "logps/chosen": -3.1167140007019043, "logps/rejected": -0.9685599207878113, "loss": 0.5506, "rewards/accuracies": 0.0, "rewards/chosen": 0.5972749590873718, "rewards/margins": -0.11378294229507446, "rewards/rejected": 0.7110579013824463, "step": 1139 }, { "epoch": 2.06, "learning_rate": 2.35491269739486e-08, "logits/chosen": -2.347465991973877, "logits/rejected": -2.350034475326538, "logps/chosen": -1.3646589517593384, "logps/rejected": -10.898735046386719, "loss": 1.1292, "rewards/accuracies": 0.0, "rewards/chosen": 0.6994962096214294, "rewards/margins": -0.1943190097808838, "rewards/rejected": 0.8938152194023132, "step": 1140 }, { "epoch": 2.06, "learning_rate": 2.3466331271547605e-08, "logits/chosen": -2.2091939449310303, "logits/rejected": -2.2204642295837402, "logps/chosen": -1.806341528892517, "logps/rejected": -5.897456645965576, "loss": 0.8211, "rewards/accuracies": 0.0, "rewards/chosen": 0.6882382035255432, "rewards/margins": -0.2963370680809021, "rewards/rejected": 0.9845752716064453, "step": 1141 }, { "epoch": 2.07, "learning_rate": 2.338363672355675e-08, "logits/chosen": -2.2474052906036377, "logits/rejected": -2.2402732372283936, "logps/chosen": -4.214157581329346, "logps/rejected": -5.1011176109313965, "loss": 0.4906, "rewards/accuracies": 1.0, "rewards/chosen": 1.0437889099121094, "rewards/margins": 0.5819649696350098, "rewards/rejected": 0.4618239104747772, "step": 1142 }, { "epoch": 2.07, "learning_rate": 2.3301043645232753e-08, "logits/chosen": -2.326573133468628, "logits/rejected": -2.3306479454040527, "logps/chosen": -1.4452641010284424, "logps/rejected": -1.9112995862960815, "loss": 0.6424, "rewards/accuracies": 1.0, "rewards/chosen": 0.6482402682304382, "rewards/margins": 0.14553391933441162, "rewards/rejected": 0.5027063488960266, "step": 1143 }, { "epoch": 2.07, "learning_rate": 2.321855235144557e-08, "logits/chosen": -2.37748384475708, "logits/rejected": -2.386478900909424, "logps/chosen": -2.2896833419799805, "logps/rejected": -13.769756317138672, "loss": 0.5375, "rewards/accuracies": 1.0, "rewards/chosen": 0.9727222323417664, "rewards/margins": 0.5064859390258789, "rewards/rejected": 0.46623632311820984, "step": 1144 }, { "epoch": 2.07, "learning_rate": 2.3136163156677092e-08, "logits/chosen": -2.4777941703796387, "logits/rejected": -2.4765145778656006, "logps/chosen": -2.1232213973999023, "logps/rejected": -12.131479263305664, "loss": 0.3812, "rewards/accuracies": 1.0, "rewards/chosen": 1.0406674146652222, "rewards/margins": 0.8744640946388245, "rewards/rejected": 0.1662033051252365, "step": 1145 }, { "epoch": 2.07, "learning_rate": 2.305387637501996e-08, "logits/chosen": -2.4058446884155273, "logits/rejected": -2.3716578483581543, "logps/chosen": -12.227700233459473, "logps/rejected": 0.0, "loss": 0.4253, "rewards/accuracies": 1.0, "rewards/chosen": 0.9476014375686646, "rewards/margins": 0.9476014375686646, "rewards/rejected": 0.0, "step": 1146 }, { "epoch": 2.07, "learning_rate": 2.297169232017639e-08, "logits/chosen": -2.3443946838378906, "logits/rejected": -2.359469175338745, "logps/chosen": -1.4487802982330322, "logps/rejected": -4.657426834106445, "loss": 0.5289, "rewards/accuracies": 1.0, "rewards/chosen": 0.9029615521430969, "rewards/margins": 0.23612767457962036, "rewards/rejected": 0.6668338775634766, "step": 1147 }, { "epoch": 2.08, "learning_rate": 2.288961130545697e-08, "logits/chosen": -2.3390376567840576, "logits/rejected": -2.484537124633789, "logps/chosen": -1.3513448238372803, "logps/rejected": -41.36440658569336, "loss": 0.5377, "rewards/accuracies": 1.0, "rewards/chosen": 0.6572112441062927, "rewards/margins": 0.38148757815361023, "rewards/rejected": 0.2757236659526825, "step": 1148 }, { "epoch": 2.08, "learning_rate": 2.2807633643779484e-08, "logits/chosen": -2.3112449645996094, "logits/rejected": -2.3032565116882324, "logps/chosen": -1.8817238807678223, "logps/rejected": -3.5977091789245605, "loss": 0.6103, "rewards/accuracies": 0.0, "rewards/chosen": 0.500330924987793, "rewards/margins": -0.07416790723800659, "rewards/rejected": 0.5744988322257996, "step": 1149 }, { "epoch": 2.08, "learning_rate": 2.2725759647667726e-08, "logits/chosen": -2.325718879699707, "logits/rejected": -2.32851505279541, "logps/chosen": -2.5868730545043945, "logps/rejected": -2.435026168823242, "loss": 0.8934, "rewards/accuracies": 1.0, "rewards/chosen": 0.8348348736763, "rewards/margins": 0.35299283266067505, "rewards/rejected": 0.481842041015625, "step": 1150 }, { "epoch": 2.08, "learning_rate": 2.2643989629250226e-08, "logits/chosen": -2.399609327316284, "logits/rejected": -2.5429506301879883, "logps/chosen": -3.987553834915161, "logps/rejected": -31.542133331298828, "loss": 0.5797, "rewards/accuracies": 1.0, "rewards/chosen": 0.560936450958252, "rewards/margins": 0.3029114603996277, "rewards/rejected": 0.25802499055862427, "step": 1151 }, { "epoch": 2.08, "learning_rate": 2.2562323900259152e-08, "logits/chosen": -2.2623980045318604, "logits/rejected": -2.2519071102142334, "logps/chosen": -2.3603570461273193, "logps/rejected": -7.018831253051758, "loss": 0.7644, "rewards/accuracies": 0.0, "rewards/chosen": 0.5646788477897644, "rewards/margins": -0.2163478136062622, "rewards/rejected": 0.7810266613960266, "step": 1152 }, { "epoch": 2.08, "learning_rate": 2.248076277202907e-08, "logits/chosen": -2.3758544921875, "logits/rejected": -2.379880428314209, "logps/chosen": -7.144970893859863, "logps/rejected": -8.278273582458496, "loss": 0.568, "rewards/accuracies": 1.0, "rewards/chosen": 0.9951268434524536, "rewards/margins": 0.585074782371521, "rewards/rejected": 0.41005203127861023, "step": 1153 }, { "epoch": 2.09, "learning_rate": 2.2399306555495823e-08, "logits/chosen": -2.2525229454040527, "logits/rejected": -2.258053779602051, "logps/chosen": -1.5069385766983032, "logps/rejected": -3.8623199462890625, "loss": 0.5482, "rewards/accuracies": 1.0, "rewards/chosen": 0.6608597636222839, "rewards/margins": 0.1997966468334198, "rewards/rejected": 0.46106311678886414, "step": 1154 }, { "epoch": 2.09, "learning_rate": 2.2317955561195234e-08, "logits/chosen": -2.2854902744293213, "logits/rejected": -2.27409029006958, "logps/chosen": -4.771859169006348, "logps/rejected": -5.905869007110596, "loss": 0.8022, "rewards/accuracies": 0.0, "rewards/chosen": 0.4545338749885559, "rewards/margins": -0.22876298427581787, "rewards/rejected": 0.6832968592643738, "step": 1155 }, { "epoch": 2.09, "learning_rate": 2.2236710099262057e-08, "logits/chosen": -2.3936047554016113, "logits/rejected": -2.3939707279205322, "logps/chosen": -7.770627498626709, "logps/rejected": -5.816629886627197, "loss": 0.7059, "rewards/accuracies": 1.0, "rewards/chosen": 0.8794227838516235, "rewards/margins": 0.23052221536636353, "rewards/rejected": 0.64890056848526, "step": 1156 }, { "epoch": 2.09, "learning_rate": 2.2155570479428654e-08, "logits/chosen": -2.2243235111236572, "logits/rejected": -2.25778865814209, "logps/chosen": 0.0, "logps/rejected": -1.2679524421691895, "loss": 0.7602, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": -0.5482051372528076, "rewards/rejected": 0.5482051372528076, "step": 1157 }, { "epoch": 2.09, "learning_rate": 2.207453701102394e-08, "logits/chosen": -2.370739698410034, "logits/rejected": -2.370739698410034, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1158 }, { "epoch": 2.1, "learning_rate": 2.199361000297213e-08, "logits/chosen": -2.3602945804595947, "logits/rejected": -2.358919143676758, "logps/chosen": -8.465776443481445, "logps/rejected": -3.029196262359619, "loss": 0.6516, "rewards/accuracies": 0.0, "rewards/chosen": 0.7810360193252563, "rewards/margins": -0.09161108732223511, "rewards/rejected": 0.8726471066474915, "step": 1159 }, { "epoch": 2.1, "learning_rate": 2.191278976379156e-08, "logits/chosen": -2.3823928833007812, "logits/rejected": -2.375274181365967, "logps/chosen": -2.9523866176605225, "logps/rejected": -3.783269166946411, "loss": 0.5191, "rewards/accuracies": 1.0, "rewards/chosen": 1.1320170164108276, "rewards/margins": 0.39144694805145264, "rewards/rejected": 0.740570068359375, "step": 1160 }, { "epoch": 2.1, "learning_rate": 2.1832076601593535e-08, "logits/chosen": -2.301546096801758, "logits/rejected": -2.294675588607788, "logps/chosen": -6.741307735443115, "logps/rejected": -3.179898738861084, "loss": 0.4077, "rewards/accuracies": 1.0, "rewards/chosen": 0.7764597535133362, "rewards/margins": 0.23602628707885742, "rewards/rejected": 0.5404334664344788, "step": 1161 }, { "epoch": 2.1, "learning_rate": 2.175147082408118e-08, "logits/chosen": -2.3113956451416016, "logits/rejected": -2.4425361156463623, "logps/chosen": -1.7108924388885498, "logps/rejected": -23.301570892333984, "loss": 0.4245, "rewards/accuracies": 1.0, "rewards/chosen": 0.9414590001106262, "rewards/margins": 0.9872183799743652, "rewards/rejected": -0.04575939103960991, "step": 1162 }, { "epoch": 2.1, "learning_rate": 2.1670972738548237e-08, "logits/chosen": -2.242699384689331, "logits/rejected": -2.239258289337158, "logps/chosen": -4.878073692321777, "logps/rejected": -2.188694477081299, "loss": 0.5419, "rewards/accuracies": 0.0, "rewards/chosen": 0.5042033195495605, "rewards/margins": -0.18906313180923462, "rewards/rejected": 0.6932664513587952, "step": 1163 }, { "epoch": 2.1, "learning_rate": 2.159058265187786e-08, "logits/chosen": -2.325199842453003, "logits/rejected": -2.325491189956665, "logps/chosen": -2.900303840637207, "logps/rejected": -13.400592803955078, "loss": 0.5847, "rewards/accuracies": 1.0, "rewards/chosen": 0.8261733055114746, "rewards/margins": 0.00022536516189575195, "rewards/rejected": 0.8259479403495789, "step": 1164 }, { "epoch": 2.11, "learning_rate": 2.151030087054149e-08, "logits/chosen": -2.2798895835876465, "logits/rejected": -2.293182373046875, "logps/chosen": -1.4327013492584229, "logps/rejected": -12.264572143554688, "loss": 0.5781, "rewards/accuracies": 1.0, "rewards/chosen": 0.6458745002746582, "rewards/margins": 0.4298933744430542, "rewards/rejected": 0.2159811109304428, "step": 1165 }, { "epoch": 2.11, "learning_rate": 2.1430127700597668e-08, "logits/chosen": -2.2820234298706055, "logits/rejected": -2.3484156131744385, "logps/chosen": -1.7189141511917114, "logps/rejected": -26.073211669921875, "loss": 0.6663, "rewards/accuracies": 1.0, "rewards/chosen": 0.5573590993881226, "rewards/margins": 0.11058574914932251, "rewards/rejected": 0.44677335023880005, "step": 1166 }, { "epoch": 2.11, "learning_rate": 2.135006344769091e-08, "logits/chosen": -2.352618932723999, "logits/rejected": -2.3808600902557373, "logps/chosen": -2.1820902824401855, "logps/rejected": -15.73728084564209, "loss": 0.5497, "rewards/accuracies": 0.0, "rewards/chosen": 0.598838210105896, "rewards/margins": -0.0904608964920044, "rewards/rejected": 0.6892991065979004, "step": 1167 }, { "epoch": 2.11, "learning_rate": 2.1270108417050458e-08, "logits/chosen": -2.2795939445495605, "logits/rejected": -2.2857937812805176, "logps/chosen": -1.8482794761657715, "logps/rejected": -4.133164405822754, "loss": 0.6234, "rewards/accuracies": 1.0, "rewards/chosen": 1.0250400304794312, "rewards/margins": 0.3014286756515503, "rewards/rejected": 0.7236113548278809, "step": 1168 }, { "epoch": 2.11, "learning_rate": 2.119026291348922e-08, "logits/chosen": -2.367807388305664, "logits/rejected": -2.3694474697113037, "logps/chosen": -2.4258577823638916, "logps/rejected": -9.133828163146973, "loss": 0.7882, "rewards/accuracies": 0.0, "rewards/chosen": 0.551811695098877, "rewards/margins": -0.3217887878417969, "rewards/rejected": 0.8736004829406738, "step": 1169 }, { "epoch": 2.12, "learning_rate": 2.11105272414025e-08, "logits/chosen": -2.344949960708618, "logits/rejected": -2.3419182300567627, "logps/chosen": -2.8975436687469482, "logps/rejected": -5.20143985748291, "loss": 0.629, "rewards/accuracies": 1.0, "rewards/chosen": 0.8125214576721191, "rewards/margins": 0.2000104784965515, "rewards/rejected": 0.6125109791755676, "step": 1170 }, { "epoch": 2.12, "learning_rate": 2.103090170476694e-08, "logits/chosen": -2.2254550457000732, "logits/rejected": -2.223266124725342, "logps/chosen": -0.6106663942337036, "logps/rejected": -4.475576400756836, "loss": 0.6158, "rewards/accuracies": 1.0, "rewards/chosen": 0.5726802945137024, "rewards/margins": 0.16927766799926758, "rewards/rejected": 0.4034026265144348, "step": 1171 }, { "epoch": 2.12, "learning_rate": 2.095138660713928e-08, "logits/chosen": -2.2964985370635986, "logits/rejected": -2.352952480316162, "logps/chosen": -3.6590158939361572, "logps/rejected": -24.074371337890625, "loss": 0.5179, "rewards/accuracies": 1.0, "rewards/chosen": 0.9074055552482605, "rewards/margins": 0.3522412180900574, "rewards/rejected": 0.5551643371582031, "step": 1172 }, { "epoch": 2.12, "learning_rate": 2.0871982251655213e-08, "logits/chosen": -2.287705659866333, "logits/rejected": -2.4064948558807373, "logps/chosen": -1.5701875686645508, "logps/rejected": -48.109580993652344, "loss": 0.6775, "rewards/accuracies": 1.0, "rewards/chosen": 0.6572052240371704, "rewards/margins": 0.14151650667190552, "rewards/rejected": 0.5156887173652649, "step": 1173 }, { "epoch": 2.12, "learning_rate": 2.0792688941028302e-08, "logits/chosen": -2.489898681640625, "logits/rejected": -2.490739345550537, "logps/chosen": -2.1968719959259033, "logps/rejected": -12.376913070678711, "loss": 0.3886, "rewards/accuracies": 1.0, "rewards/chosen": 0.8083178400993347, "rewards/margins": 0.5583812594413757, "rewards/rejected": 0.24993658065795898, "step": 1174 }, { "epoch": 2.12, "learning_rate": 2.0713506977548767e-08, "logits/chosen": -2.2423136234283447, "logits/rejected": -2.2239181995391846, "logps/chosen": -12.112817764282227, "logps/rejected": -5.446467399597168, "loss": 0.4439, "rewards/accuracies": 1.0, "rewards/chosen": 0.7112447619438171, "rewards/margins": 0.36138084530830383, "rewards/rejected": 0.3498639166355133, "step": 1175 }, { "epoch": 2.13, "learning_rate": 2.0634436663082294e-08, "logits/chosen": -2.330120325088501, "logits/rejected": -2.324713706970215, "logps/chosen": -1.7656385898590088, "logps/rejected": -2.385331869125366, "loss": 0.7, "rewards/accuracies": 0.0, "rewards/chosen": 0.503816545009613, "rewards/margins": -0.25531888008117676, "rewards/rejected": 0.7591354250907898, "step": 1176 }, { "epoch": 2.13, "learning_rate": 2.055547829906896e-08, "logits/chosen": -2.402306318283081, "logits/rejected": -2.5228450298309326, "logps/chosen": -1.8962241411209106, "logps/rejected": -26.37246322631836, "loss": 0.3923, "rewards/accuracies": 1.0, "rewards/chosen": 0.6834157109260559, "rewards/margins": 0.8665631413459778, "rewards/rejected": -0.18314743041992188, "step": 1177 }, { "epoch": 2.13, "learning_rate": 2.047663218652206e-08, "logits/chosen": -2.287083148956299, "logits/rejected": -2.2689688205718994, "logps/chosen": -7.492793083190918, "logps/rejected": -2.0616931915283203, "loss": 0.5886, "rewards/accuracies": 1.0, "rewards/chosen": 1.048287272453308, "rewards/margins": 0.15048670768737793, "rewards/rejected": 0.8978005647659302, "step": 1178 }, { "epoch": 2.13, "learning_rate": 2.03978986260269e-08, "logits/chosen": -2.2292068004608154, "logits/rejected": -2.223792314529419, "logps/chosen": -2.5680899620056152, "logps/rejected": -2.7870919704437256, "loss": 0.646, "rewards/accuracies": 1.0, "rewards/chosen": 0.5192261338233948, "rewards/margins": 0.14300739765167236, "rewards/rejected": 0.3762187361717224, "step": 1179 }, { "epoch": 2.13, "learning_rate": 2.0319277917739774e-08, "logits/chosen": -2.360095977783203, "logits/rejected": -2.3349599838256836, "logps/chosen": -4.920892715454102, "logps/rejected": 0.0, "loss": 0.4821, "rewards/accuracies": 1.0, "rewards/chosen": 1.1671736240386963, "rewards/margins": 1.1671736240386963, "rewards/rejected": 0.0, "step": 1180 }, { "epoch": 2.14, "learning_rate": 2.0240770361386738e-08, "logits/chosen": -2.251645088195801, "logits/rejected": -2.3549695014953613, "logps/chosen": -2.735656976699829, "logps/rejected": -30.854774475097656, "loss": 0.5572, "rewards/accuracies": 1.0, "rewards/chosen": 0.6207780838012695, "rewards/margins": 0.5779060125350952, "rewards/rejected": 0.042872048914432526, "step": 1181 }, { "epoch": 2.14, "learning_rate": 2.0162376256262443e-08, "logits/chosen": -2.306879997253418, "logits/rejected": -2.3122880458831787, "logps/chosen": -2.165736436843872, "logps/rejected": -1.4750348329544067, "loss": 0.6762, "rewards/accuracies": 1.0, "rewards/chosen": 0.7585407495498657, "rewards/margins": 0.12224310636520386, "rewards/rejected": 0.6362976431846619, "step": 1182 }, { "epoch": 2.14, "learning_rate": 2.008409590122902e-08, "logits/chosen": -2.371055841445923, "logits/rejected": -2.4759938716888428, "logps/chosen": -2.4130618572235107, "logps/rejected": -41.20586013793945, "loss": 0.7172, "rewards/accuracies": 0.0, "rewards/chosen": 0.5473443865776062, "rewards/margins": -0.0626634955406189, "rewards/rejected": 0.6100078821182251, "step": 1183 }, { "epoch": 2.14, "learning_rate": 2.0005929594715015e-08, "logits/chosen": -2.3131167888641357, "logits/rejected": -2.4630823135375977, "logps/chosen": -1.5835589170455933, "logps/rejected": -26.129688262939453, "loss": 0.6864, "rewards/accuracies": 1.0, "rewards/chosen": 0.6585304141044617, "rewards/margins": 0.4545512795448303, "rewards/rejected": 0.20397911965847015, "step": 1184 }, { "epoch": 2.14, "learning_rate": 1.992787763471413e-08, "logits/chosen": -2.3412280082702637, "logits/rejected": -2.340857982635498, "logps/chosen": -2.091593027114868, "logps/rejected": -4.730628967285156, "loss": 0.7596, "rewards/accuracies": 0.0, "rewards/chosen": 0.5019629597663879, "rewards/margins": -0.23467636108398438, "rewards/rejected": 0.7366393208503723, "step": 1185 }, { "epoch": 2.14, "learning_rate": 1.9849940318784142e-08, "logits/chosen": -2.3773622512817383, "logits/rejected": -2.3784990310668945, "logps/chosen": -1.418758749961853, "logps/rejected": -2.4124882221221924, "loss": 0.6842, "rewards/accuracies": 0.0, "rewards/chosen": 0.5803930759429932, "rewards/margins": -0.0009101629257202148, "rewards/rejected": 0.5813032388687134, "step": 1186 }, { "epoch": 2.15, "learning_rate": 1.9772117944045812e-08, "logits/chosen": -2.3162176609039307, "logits/rejected": -2.3123998641967773, "logps/chosen": -1.7679262161254883, "logps/rejected": -4.725149154663086, "loss": 0.6752, "rewards/accuracies": 0.0, "rewards/chosen": 0.6767054796218872, "rewards/margins": -0.11378943920135498, "rewards/rejected": 0.7904949188232422, "step": 1187 }, { "epoch": 2.15, "learning_rate": 1.9694410807181694e-08, "logits/chosen": -2.3583316802978516, "logits/rejected": -2.3630993366241455, "logps/chosen": -0.4212506413459778, "logps/rejected": -1.626866102218628, "loss": 0.6577, "rewards/accuracies": 0.0, "rewards/chosen": 0.5151858925819397, "rewards/margins": -0.0937756896018982, "rewards/rejected": 0.6089615821838379, "step": 1188 }, { "epoch": 2.15, "learning_rate": 1.9616819204435003e-08, "logits/chosen": -2.353590250015259, "logits/rejected": -2.356466054916382, "logps/chosen": -2.574275255203247, "logps/rejected": -5.663507461547852, "loss": 0.5548, "rewards/accuracies": 1.0, "rewards/chosen": 0.9623011946678162, "rewards/margins": 0.49024996161460876, "rewards/rejected": 0.4720512330532074, "step": 1189 }, { "epoch": 2.15, "learning_rate": 1.9539343431608508e-08, "logits/chosen": -2.296410322189331, "logits/rejected": -2.382105827331543, "logps/chosen": -1.55852210521698, "logps/rejected": -34.7588005065918, "loss": 0.8258, "rewards/accuracies": 0.0, "rewards/chosen": 0.635547935962677, "rewards/margins": -0.08643960952758789, "rewards/rejected": 0.7219875454902649, "step": 1190 }, { "epoch": 2.15, "learning_rate": 1.946198378406339e-08, "logits/chosen": -2.244940757751465, "logits/rejected": -2.2547662258148193, "logps/chosen": -1.5173189640045166, "logps/rejected": -3.9108543395996094, "loss": 0.6424, "rewards/accuracies": 0.0, "rewards/chosen": 0.5272880792617798, "rewards/margins": -0.206567645072937, "rewards/rejected": 0.7338557243347168, "step": 1191 }, { "epoch": 2.16, "learning_rate": 1.9384740556718176e-08, "logits/chosen": -2.450026750564575, "logits/rejected": -2.44201922416687, "logps/chosen": -1.4899367094039917, "logps/rejected": -6.45443868637085, "loss": 0.5265, "rewards/accuracies": 1.0, "rewards/chosen": 0.787942111492157, "rewards/margins": 0.15792036056518555, "rewards/rejected": 0.6300217509269714, "step": 1192 }, { "epoch": 2.16, "learning_rate": 1.9307614044047483e-08, "logits/chosen": -2.238224983215332, "logits/rejected": -2.230307102203369, "logps/chosen": -2.1438751220703125, "logps/rejected": -6.44508171081543, "loss": 0.7453, "rewards/accuracies": 0.0, "rewards/chosen": 0.7559353113174438, "rewards/margins": -0.17679458856582642, "rewards/rejected": 0.9327298998832703, "step": 1193 }, { "epoch": 2.16, "learning_rate": 1.9230604540081063e-08, "logits/chosen": -2.3208327293395996, "logits/rejected": -2.314373254776001, "logps/chosen": -0.7003927826881409, "logps/rejected": -4.311269760131836, "loss": 0.5637, "rewards/accuracies": 0.0, "rewards/chosen": 0.5441593527793884, "rewards/margins": -0.04830986261367798, "rewards/rejected": 0.5924692153930664, "step": 1194 }, { "epoch": 2.16, "learning_rate": 1.9153712338402535e-08, "logits/chosen": -2.1845717430114746, "logits/rejected": -2.1814775466918945, "logps/chosen": -1.2101609706878662, "logps/rejected": -2.480159044265747, "loss": 0.5796, "rewards/accuracies": 0.0, "rewards/chosen": 0.44333791732788086, "rewards/margins": -0.18573743104934692, "rewards/rejected": 0.6290753483772278, "step": 1195 }, { "epoch": 2.16, "learning_rate": 1.9076937732148313e-08, "logits/chosen": -2.325235366821289, "logits/rejected": -2.2873544692993164, "logps/chosen": -11.97918701171875, "logps/rejected": -2.4710943698883057, "loss": 0.4801, "rewards/accuracies": 1.0, "rewards/chosen": 0.9349659085273743, "rewards/margins": 0.29940474033355713, "rewards/rejected": 0.6355611681938171, "step": 1196 }, { "epoch": 2.16, "learning_rate": 1.9000281014006585e-08, "logits/chosen": -2.184908151626587, "logits/rejected": -2.223098039627075, "logps/chosen": -0.9087822437286377, "logps/rejected": -6.059598922729492, "loss": 0.7223, "rewards/accuracies": 0.0, "rewards/chosen": 0.46742144227027893, "rewards/margins": -0.3686005771160126, "rewards/rejected": 0.8360220193862915, "step": 1197 }, { "epoch": 2.17, "learning_rate": 1.8923742476216013e-08, "logits/chosen": -2.2394495010375977, "logits/rejected": -2.243406295776367, "logps/chosen": -2.3871474266052246, "logps/rejected": -4.6699371337890625, "loss": 0.5486, "rewards/accuracies": 0.0, "rewards/chosen": 0.8402380347251892, "rewards/margins": -0.005861759185791016, "rewards/rejected": 0.8460997939109802, "step": 1198 }, { "epoch": 2.17, "learning_rate": 1.8847322410564814e-08, "logits/chosen": -2.18884539604187, "logits/rejected": -2.183074951171875, "logps/chosen": -4.080850601196289, "logps/rejected": -11.388764381408691, "loss": 0.5319, "rewards/accuracies": 1.0, "rewards/chosen": 0.6335256695747375, "rewards/margins": 0.4767414927482605, "rewards/rejected": 0.15678416192531586, "step": 1199 }, { "epoch": 2.17, "learning_rate": 1.8771021108389456e-08, "logits/chosen": -2.340559482574463, "logits/rejected": -2.313683032989502, "logps/chosen": -0.9468029141426086, "logps/rejected": 0.0, "loss": 0.5087, "rewards/accuracies": 1.0, "rewards/chosen": 0.6980526447296143, "rewards/margins": 0.6980526447296143, "rewards/rejected": 0.0, "step": 1200 }, { "epoch": 2.17, "learning_rate": 1.8694838860573743e-08, "logits/chosen": -2.381038188934326, "logits/rejected": -2.402003288269043, "logps/chosen": -3.145419120788574, "logps/rejected": -9.858158111572266, "loss": 0.6254, "rewards/accuracies": 1.0, "rewards/chosen": 0.7481895685195923, "rewards/margins": 0.2558405101299286, "rewards/rejected": 0.4923490583896637, "step": 1201 }, { "epoch": 2.17, "learning_rate": 1.8618775957547527e-08, "logits/chosen": -2.280674457550049, "logits/rejected": -2.2815818786621094, "logps/chosen": -2.8014588356018066, "logps/rejected": -3.7078065872192383, "loss": 0.6697, "rewards/accuracies": 1.0, "rewards/chosen": 0.9541776776313782, "rewards/margins": 0.3724824786186218, "rewards/rejected": 0.5816951990127563, "step": 1202 }, { "epoch": 2.18, "learning_rate": 1.8542832689285732e-08, "logits/chosen": -2.4248287677764893, "logits/rejected": -2.4278724193573, "logps/chosen": -1.4302775859832764, "logps/rejected": -3.7371065616607666, "loss": 0.4525, "rewards/accuracies": 1.0, "rewards/chosen": 0.7622265815734863, "rewards/margins": 0.2518380880355835, "rewards/rejected": 0.5103884935379028, "step": 1203 }, { "epoch": 2.18, "learning_rate": 1.846700934530715e-08, "logits/chosen": -2.433410167694092, "logits/rejected": -2.4304890632629395, "logps/chosen": -13.465323448181152, "logps/rejected": -8.35877513885498, "loss": 0.5643, "rewards/accuracies": 1.0, "rewards/chosen": 0.873062789440155, "rewards/margins": 0.29799604415893555, "rewards/rejected": 0.5750667452812195, "step": 1204 }, { "epoch": 2.18, "learning_rate": 1.839130621467343e-08, "logits/chosen": -2.2998807430267334, "logits/rejected": -2.331434726715088, "logps/chosen": -3.3424978256225586, "logps/rejected": -12.031938552856445, "loss": 0.9036, "rewards/accuracies": 0.0, "rewards/chosen": 0.48248109221458435, "rewards/margins": -0.427630752325058, "rewards/rejected": 0.9101118445396423, "step": 1205 }, { "epoch": 2.18, "learning_rate": 1.8315723585987945e-08, "logits/chosen": -2.2992374897003174, "logits/rejected": -2.3975229263305664, "logps/chosen": -1.9013019800186157, "logps/rejected": -21.801353454589844, "loss": 0.4419, "rewards/accuracies": 1.0, "rewards/chosen": 0.621078372001648, "rewards/margins": 0.34525763988494873, "rewards/rejected": 0.2758207321166992, "step": 1206 }, { "epoch": 2.18, "learning_rate": 1.8240261747394626e-08, "logits/chosen": -2.37296724319458, "logits/rejected": -2.3609838485717773, "logps/chosen": -1.7971088886260986, "logps/rejected": -9.791805267333984, "loss": 0.5812, "rewards/accuracies": 1.0, "rewards/chosen": 0.6202206015586853, "rewards/margins": 0.5168987512588501, "rewards/rejected": 0.10332184284925461, "step": 1207 }, { "epoch": 2.18, "learning_rate": 1.8164920986576932e-08, "logits/chosen": -2.353951930999756, "logits/rejected": -2.35203218460083, "logps/chosen": -1.7439239025115967, "logps/rejected": -1.2960518598556519, "loss": 0.7304, "rewards/accuracies": 0.0, "rewards/chosen": 0.3930536210536957, "rewards/margins": -0.06059238314628601, "rewards/rejected": 0.4536460041999817, "step": 1208 }, { "epoch": 2.19, "learning_rate": 1.808970159075674e-08, "logits/chosen": -2.237532615661621, "logits/rejected": -2.3072187900543213, "logps/chosen": -1.9058682918548584, "logps/rejected": -30.09837532043457, "loss": 0.5244, "rewards/accuracies": 1.0, "rewards/chosen": 0.5873695611953735, "rewards/margins": 0.6436764001846313, "rewards/rejected": -0.05630683898925781, "step": 1209 }, { "epoch": 2.19, "learning_rate": 1.8014603846693266e-08, "logits/chosen": -2.398038625717163, "logits/rejected": -2.3740899562835693, "logps/chosen": -11.926756858825684, "logps/rejected": 0.0, "loss": 0.4709, "rewards/accuracies": 1.0, "rewards/chosen": 0.7287489771842957, "rewards/margins": 0.7287489771842957, "rewards/rejected": 0.0, "step": 1210 }, { "epoch": 2.19, "learning_rate": 1.793962804068192e-08, "logits/chosen": -2.266484022140503, "logits/rejected": -2.2773125171661377, "logps/chosen": -1.2356188297271729, "logps/rejected": -3.1429104804992676, "loss": 0.5463, "rewards/accuracies": 1.0, "rewards/chosen": 0.5150451064109802, "rewards/margins": 0.11814993619918823, "rewards/rejected": 0.396895170211792, "step": 1211 }, { "epoch": 2.19, "learning_rate": 1.786477445855329e-08, "logits/chosen": -2.179133176803589, "logits/rejected": -2.1862826347351074, "logps/chosen": -1.2984519004821777, "logps/rejected": -7.792686939239502, "loss": 0.5583, "rewards/accuracies": 1.0, "rewards/chosen": 0.5119914412498474, "rewards/margins": 0.3991290032863617, "rewards/rejected": 0.11286244541406631, "step": 1212 }, { "epoch": 2.19, "learning_rate": 1.7790043385671944e-08, "logits/chosen": -2.2534897327423096, "logits/rejected": -2.2548327445983887, "logps/chosen": -1.9644936323165894, "logps/rejected": -3.4359569549560547, "loss": 0.7053, "rewards/accuracies": 1.0, "rewards/chosen": 0.804559051990509, "rewards/margins": 0.3814563453197479, "rewards/rejected": 0.4231027066707611, "step": 1213 }, { "epoch": 2.2, "learning_rate": 1.7715435106935487e-08, "logits/chosen": -2.223036527633667, "logits/rejected": -2.2942824363708496, "logps/chosen": -2.9911932945251465, "logps/rejected": -22.467201232910156, "loss": 0.6583, "rewards/accuracies": 1.0, "rewards/chosen": 0.763335645198822, "rewards/margins": 0.3258408010005951, "rewards/rejected": 0.43749484419822693, "step": 1214 }, { "epoch": 2.2, "learning_rate": 1.7640949906773333e-08, "logits/chosen": -2.2851033210754395, "logits/rejected": -2.2811243534088135, "logps/chosen": -3.239638566970825, "logps/rejected": -3.3339593410491943, "loss": 0.7126, "rewards/accuracies": 1.0, "rewards/chosen": 0.6868330240249634, "rewards/margins": 0.07113295793533325, "rewards/rejected": 0.6157000660896301, "step": 1215 }, { "epoch": 2.2, "learning_rate": 1.7566588069145717e-08, "logits/chosen": -2.3727617263793945, "logits/rejected": -2.3724422454833984, "logps/chosen": -1.3723547458648682, "logps/rejected": -1.3713984489440918, "loss": 0.5774, "rewards/accuracies": 1.0, "rewards/chosen": 0.7475244402885437, "rewards/margins": 0.2902054190635681, "rewards/rejected": 0.4573190212249756, "step": 1216 }, { "epoch": 2.2, "learning_rate": 1.749234987754253e-08, "logits/chosen": -2.392866849899292, "logits/rejected": -2.389598846435547, "logps/chosen": -8.177204132080078, "logps/rejected": -5.273365020751953, "loss": 0.4695, "rewards/accuracies": 1.0, "rewards/chosen": 0.9829301834106445, "rewards/margins": 0.5480100512504578, "rewards/rejected": 0.43492013216018677, "step": 1217 }, { "epoch": 2.2, "learning_rate": 1.7418235614982358e-08, "logits/chosen": -2.2639658451080322, "logits/rejected": -2.2595810890197754, "logps/chosen": -2.2285892963409424, "logps/rejected": -4.740245819091797, "loss": 0.5439, "rewards/accuracies": 1.0, "rewards/chosen": 0.7265265583992004, "rewards/margins": 0.26749125123023987, "rewards/rejected": 0.45903530716896057, "step": 1218 }, { "epoch": 2.2, "learning_rate": 1.73442455640113e-08, "logits/chosen": -2.2630412578582764, "logits/rejected": -2.292266845703125, "logps/chosen": -5.849012851715088, "logps/rejected": -13.504280090332031, "loss": 0.6592, "rewards/accuracies": 0.0, "rewards/chosen": 0.54874187707901, "rewards/margins": -0.35274624824523926, "rewards/rejected": 0.9014881253242493, "step": 1219 }, { "epoch": 2.21, "learning_rate": 1.7270380006701914e-08, "logits/chosen": -2.3025808334350586, "logits/rejected": -2.3067874908447266, "logps/chosen": -1.3592373132705688, "logps/rejected": -2.882920980453491, "loss": 0.7006, "rewards/accuracies": 0.0, "rewards/chosen": 0.8461261987686157, "rewards/margins": -0.0705004334449768, "rewards/rejected": 0.9166266322135925, "step": 1220 }, { "epoch": 2.21, "learning_rate": 1.719663922465215e-08, "logits/chosen": -2.203117847442627, "logits/rejected": -2.211768865585327, "logps/chosen": -1.8719969987869263, "logps/rejected": -4.998047351837158, "loss": 0.6766, "rewards/accuracies": 1.0, "rewards/chosen": 0.7689704298973083, "rewards/margins": 0.13009929656982422, "rewards/rejected": 0.6388711333274841, "step": 1221 }, { "epoch": 2.21, "learning_rate": 1.7123023498984262e-08, "logits/chosen": -2.4138894081115723, "logits/rejected": -2.416050910949707, "logps/chosen": -1.563769817352295, "logps/rejected": -2.875617504119873, "loss": 0.7015, "rewards/accuracies": 0.0, "rewards/chosen": 0.49467164278030396, "rewards/margins": -0.14421838521957397, "rewards/rejected": 0.6388900279998779, "step": 1222 }, { "epoch": 2.21, "learning_rate": 1.70495331103438e-08, "logits/chosen": -2.2718732357025146, "logits/rejected": -2.2705845832824707, "logps/chosen": -0.6578314304351807, "logps/rejected": -4.856772422790527, "loss": 0.6158, "rewards/accuracies": 1.0, "rewards/chosen": 0.4838705062866211, "rewards/margins": 0.356689453125, "rewards/rejected": 0.1271810531616211, "step": 1223 }, { "epoch": 2.21, "learning_rate": 1.697616833889847e-08, "logits/chosen": -2.3169422149658203, "logits/rejected": -2.3370509147644043, "logps/chosen": -3.4795026779174805, "logps/rejected": -25.127073287963867, "loss": 0.6976, "rewards/accuracies": 1.0, "rewards/chosen": 0.7248013615608215, "rewards/margins": 0.023965001106262207, "rewards/rejected": 0.7008363604545593, "step": 1224 }, { "epoch": 2.22, "learning_rate": 1.690292946433707e-08, "logits/chosen": -2.3075900077819824, "logits/rejected": -2.3754405975341797, "logps/chosen": -2.1656975746154785, "logps/rejected": -29.270578384399414, "loss": 0.5601, "rewards/accuracies": 1.0, "rewards/chosen": 0.6839872002601624, "rewards/margins": 0.2390422523021698, "rewards/rejected": 0.44494494795799255, "step": 1225 }, { "epoch": 2.22, "learning_rate": 1.6829816765868427e-08, "logits/chosen": -2.175971508026123, "logits/rejected": -2.1740615367889404, "logps/chosen": -3.9821834564208984, "logps/rejected": -6.353833198547363, "loss": 0.7188, "rewards/accuracies": 0.0, "rewards/chosen": 0.5432277917861938, "rewards/margins": -0.2573857307434082, "rewards/rejected": 0.800613522529602, "step": 1226 }, { "epoch": 2.22, "learning_rate": 1.675683052222041e-08, "logits/chosen": -2.2985494136810303, "logits/rejected": -2.303642511367798, "logps/chosen": -1.5844924449920654, "logps/rejected": -8.873428344726562, "loss": 0.7669, "rewards/accuracies": 0.0, "rewards/chosen": 0.8598515391349792, "rewards/margins": -0.2057386040687561, "rewards/rejected": 1.0655901432037354, "step": 1227 }, { "epoch": 2.22, "learning_rate": 1.668397101163875e-08, "logits/chosen": -2.195660352706909, "logits/rejected": -2.2016689777374268, "logps/chosen": -2.5689234733581543, "logps/rejected": -2.2506444454193115, "loss": 0.6224, "rewards/accuracies": 0.0, "rewards/chosen": 0.4674919545650482, "rewards/margins": -0.049680501222610474, "rewards/rejected": 0.5171724557876587, "step": 1228 }, { "epoch": 2.22, "learning_rate": 1.6611238511886017e-08, "logits/chosen": -2.412442207336426, "logits/rejected": -2.409449577331543, "logps/chosen": -3.1443350315093994, "logps/rejected": -2.623861789703369, "loss": 0.5923, "rewards/accuracies": 1.0, "rewards/chosen": 0.6811008453369141, "rewards/margins": 0.27147650718688965, "rewards/rejected": 0.4096243381500244, "step": 1229 }, { "epoch": 2.22, "learning_rate": 1.6538633300240634e-08, "logits/chosen": -2.353764057159424, "logits/rejected": -2.359640598297119, "logps/chosen": -1.4842896461486816, "logps/rejected": -2.5558574199676514, "loss": 0.5653, "rewards/accuracies": 1.0, "rewards/chosen": 0.5717670321464539, "rewards/margins": 0.013578951358795166, "rewards/rejected": 0.5581880807876587, "step": 1230 }, { "epoch": 2.23, "learning_rate": 1.646615565349575e-08, "logits/chosen": -2.2866151332855225, "logits/rejected": -2.2146575450897217, "logps/chosen": -29.073345184326172, "logps/rejected": -3.985030174255371, "loss": 0.5857, "rewards/accuracies": 1.0, "rewards/chosen": 1.1390122175216675, "rewards/margins": 0.5747837424278259, "rewards/rejected": 0.5642284750938416, "step": 1231 }, { "epoch": 2.23, "learning_rate": 1.6393805847958174e-08, "logits/chosen": -2.4275448322296143, "logits/rejected": -2.4227418899536133, "logps/chosen": -2.1951751708984375, "logps/rejected": -4.127463340759277, "loss": 0.6178, "rewards/accuracies": 1.0, "rewards/chosen": 1.0284008979797363, "rewards/margins": 0.43129175901412964, "rewards/rejected": 0.5971091389656067, "step": 1232 }, { "epoch": 2.23, "learning_rate": 1.6321584159447343e-08, "logits/chosen": -2.3563315868377686, "logits/rejected": -2.362989902496338, "logps/chosen": -1.3335639238357544, "logps/rejected": -4.166983127593994, "loss": 0.9412, "rewards/accuracies": 1.0, "rewards/chosen": 0.6043522953987122, "rewards/margins": 0.19870123267173767, "rewards/rejected": 0.4056510627269745, "step": 1233 }, { "epoch": 2.23, "learning_rate": 1.6249490863294302e-08, "logits/chosen": -2.309718370437622, "logits/rejected": -2.276590585708618, "logps/chosen": -1.2024412155151367, "logps/rejected": 0.0, "loss": 0.467, "rewards/accuracies": 1.0, "rewards/chosen": 0.7428150177001953, "rewards/margins": 0.7428150177001953, "rewards/rejected": 0.0, "step": 1234 }, { "epoch": 2.23, "learning_rate": 1.617752623434057e-08, "logits/chosen": -2.328895330429077, "logits/rejected": -2.5155279636383057, "logps/chosen": -1.6179395914077759, "logps/rejected": -50.84217834472656, "loss": 0.5876, "rewards/accuracies": 1.0, "rewards/chosen": 0.8849280476570129, "rewards/margins": 0.47896718978881836, "rewards/rejected": 0.4059608578681946, "step": 1235 }, { "epoch": 2.24, "learning_rate": 1.610569054693723e-08, "logits/chosen": -2.238558292388916, "logits/rejected": -2.2322940826416016, "logps/chosen": -1.9962561130523682, "logps/rejected": -9.421932220458984, "loss": 0.6675, "rewards/accuracies": 0.0, "rewards/chosen": 0.6139235496520996, "rewards/margins": -0.17974406480789185, "rewards/rejected": 0.7936676144599915, "step": 1236 }, { "epoch": 2.24, "learning_rate": 1.6033984074943746e-08, "logits/chosen": -2.361633062362671, "logits/rejected": -2.4085347652435303, "logps/chosen": -3.817159652709961, "logps/rejected": -26.850780487060547, "loss": 0.5204, "rewards/accuracies": 1.0, "rewards/chosen": 0.6441980600357056, "rewards/margins": 0.9659542441368103, "rewards/rejected": -0.32175618410110474, "step": 1237 }, { "epoch": 2.24, "learning_rate": 1.5962407091726986e-08, "logits/chosen": -2.3947675228118896, "logits/rejected": -2.3881869316101074, "logps/chosen": -9.742277145385742, "logps/rejected": -4.527799606323242, "loss": 0.6412, "rewards/accuracies": 1.0, "rewards/chosen": 0.7525821924209595, "rewards/margins": 0.2199193835258484, "rewards/rejected": 0.5326628088951111, "step": 1238 }, { "epoch": 2.24, "learning_rate": 1.5890959870160146e-08, "logits/chosen": -2.3141610622406006, "logits/rejected": -2.3237717151641846, "logps/chosen": -1.2192420959472656, "logps/rejected": -3.258162498474121, "loss": 0.6621, "rewards/accuracies": 1.0, "rewards/chosen": 0.5553576946258545, "rewards/margins": 0.1282932162284851, "rewards/rejected": 0.4270644783973694, "step": 1239 }, { "epoch": 2.24, "learning_rate": 1.5819642682621788e-08, "logits/chosen": -2.3794822692871094, "logits/rejected": -2.3749215602874756, "logps/chosen": -1.6080501079559326, "logps/rejected": -3.6496782302856445, "loss": 0.5598, "rewards/accuracies": 1.0, "rewards/chosen": 0.6394320726394653, "rewards/margins": 0.25848275423049927, "rewards/rejected": 0.38094931840896606, "step": 1240 }, { "epoch": 2.24, "learning_rate": 1.5748455800994674e-08, "logits/chosen": -2.3331615924835205, "logits/rejected": -2.308152437210083, "logps/chosen": -6.079657554626465, "logps/rejected": 0.0, "loss": 0.4552, "rewards/accuracies": 1.0, "rewards/chosen": 1.1939914226531982, "rewards/margins": 1.1939914226531982, "rewards/rejected": 0.0, "step": 1241 }, { "epoch": 2.25, "learning_rate": 1.5677399496664833e-08, "logits/chosen": -2.302344799041748, "logits/rejected": -2.3049752712249756, "logps/chosen": -1.5597782135009766, "logps/rejected": -1.2346993684768677, "loss": 0.7313, "rewards/accuracies": 0.0, "rewards/chosen": 0.6628643870353699, "rewards/margins": -0.03708738088607788, "rewards/rejected": 0.6999517679214478, "step": 1242 }, { "epoch": 2.25, "learning_rate": 1.560647404052051e-08, "logits/chosen": -2.2897815704345703, "logits/rejected": -2.272934675216675, "logps/chosen": -3.5889976024627686, "logps/rejected": -10.236196517944336, "loss": 0.5918, "rewards/accuracies": 1.0, "rewards/chosen": 1.0663880109786987, "rewards/margins": 0.7288881540298462, "rewards/rejected": 0.33749982714653015, "step": 1243 }, { "epoch": 2.25, "learning_rate": 1.5535679702951122e-08, "logits/chosen": -2.2705798149108887, "logits/rejected": -2.281543254852295, "logps/chosen": -2.067885398864746, "logps/rejected": -2.361919403076172, "loss": 0.5542, "rewards/accuracies": 1.0, "rewards/chosen": 0.6461667418479919, "rewards/margins": 0.2369457483291626, "rewards/rejected": 0.40922099351882935, "step": 1244 }, { "epoch": 2.25, "learning_rate": 1.5465016753846173e-08, "logits/chosen": -2.3788416385650635, "logits/rejected": -2.48814058303833, "logps/chosen": -2.0299906730651855, "logps/rejected": -39.6182861328125, "loss": 0.4804, "rewards/accuracies": 1.0, "rewards/chosen": 0.5882782340049744, "rewards/margins": 0.33088991045951843, "rewards/rejected": 0.25738832354545593, "step": 1245 }, { "epoch": 2.25, "learning_rate": 1.539448546259431e-08, "logits/chosen": -2.247368097305298, "logits/rejected": -2.253429412841797, "logps/chosen": -4.781901836395264, "logps/rejected": -2.5335421562194824, "loss": 0.5891, "rewards/accuracies": 1.0, "rewards/chosen": 0.6780169010162354, "rewards/margins": 0.1213347315788269, "rewards/rejected": 0.5566821694374084, "step": 1246 }, { "epoch": 2.25, "learning_rate": 1.5324086098082233e-08, "logits/chosen": -2.3122658729553223, "logits/rejected": -2.3103790283203125, "logps/chosen": -1.8599473237991333, "logps/rejected": -2.0840485095977783, "loss": 0.6696, "rewards/accuracies": 0.0, "rewards/chosen": 0.6156582236289978, "rewards/margins": -0.07429367303848267, "rewards/rejected": 0.6899518966674805, "step": 1247 }, { "epoch": 2.26, "learning_rate": 1.525381892869374e-08, "logits/chosen": -2.2847557067871094, "logits/rejected": -2.4285194873809814, "logps/chosen": -1.9296423196792603, "logps/rejected": -40.82734680175781, "loss": 0.3956, "rewards/accuracies": 1.0, "rewards/chosen": 0.6622020602226257, "rewards/margins": 0.8509472012519836, "rewards/rejected": -0.18874512612819672, "step": 1248 }, { "epoch": 2.26, "learning_rate": 1.518368422230865e-08, "logits/chosen": -2.281108856201172, "logits/rejected": -2.280097723007202, "logps/chosen": -1.0561085939407349, "logps/rejected": -3.4180681705474854, "loss": 0.8043, "rewards/accuracies": 1.0, "rewards/chosen": 0.6409944295883179, "rewards/margins": 0.14263302087783813, "rewards/rejected": 0.49836140871047974, "step": 1249 }, { "epoch": 2.26, "learning_rate": 1.511368224630177e-08, "logits/chosen": -2.2748517990112305, "logits/rejected": -2.2658355236053467, "logps/chosen": -1.6439900398254395, "logps/rejected": -5.954635143280029, "loss": 0.5524, "rewards/accuracies": 1.0, "rewards/chosen": 0.6362378597259521, "rewards/margins": 0.22753208875656128, "rewards/rejected": 0.40870577096939087, "step": 1250 }, { "epoch": 2.26, "learning_rate": 1.5043813267541907e-08, "logits/chosen": -2.333585739135742, "logits/rejected": -2.3392140865325928, "logps/chosen": -2.0221142768859863, "logps/rejected": -8.002583503723145, "loss": 0.7233, "rewards/accuracies": 0.0, "rewards/chosen": 0.7470458745956421, "rewards/margins": -0.3024369478225708, "rewards/rejected": 1.049482822418213, "step": 1251 }, { "epoch": 2.26, "learning_rate": 1.4974077552390823e-08, "logits/chosen": -2.371168851852417, "logits/rejected": -2.343960762023926, "logps/chosen": -1.4966610670089722, "logps/rejected": 0.0, "loss": 0.5612, "rewards/accuracies": 1.0, "rewards/chosen": 0.72199946641922, "rewards/margins": 0.72199946641922, "rewards/rejected": 0.0, "step": 1252 }, { "epoch": 2.27, "learning_rate": 1.4904475366702303e-08, "logits/chosen": -2.1663589477539062, "logits/rejected": -2.1680169105529785, "logps/chosen": -0.7366223335266113, "logps/rejected": -6.026187419891357, "loss": 0.6575, "rewards/accuracies": 0.0, "rewards/chosen": 0.5052886009216309, "rewards/margins": -0.21478134393692017, "rewards/rejected": 0.720069944858551, "step": 1253 }, { "epoch": 2.27, "learning_rate": 1.4835006975820997e-08, "logits/chosen": -2.3434274196624756, "logits/rejected": -2.3431692123413086, "logps/chosen": -1.7616227865219116, "logps/rejected": -11.72451400756836, "loss": 0.5549, "rewards/accuracies": 1.0, "rewards/chosen": 1.029295563697815, "rewards/margins": 0.8793857097625732, "rewards/rejected": 0.1499098837375641, "step": 1254 }, { "epoch": 2.27, "learning_rate": 1.4765672644581555e-08, "logits/chosen": -2.3148670196533203, "logits/rejected": -2.314990520477295, "logps/chosen": -4.12153434753418, "logps/rejected": -8.257246017456055, "loss": 0.7962, "rewards/accuracies": 0.0, "rewards/chosen": 0.7838255167007446, "rewards/margins": -0.3792785406112671, "rewards/rejected": 1.1631040573120117, "step": 1255 }, { "epoch": 2.27, "learning_rate": 1.46964726373075e-08, "logits/chosen": -2.3407483100891113, "logits/rejected": -2.337872266769409, "logps/chosen": -1.8627463579177856, "logps/rejected": -6.268837928771973, "loss": 0.7903, "rewards/accuracies": 0.0, "rewards/chosen": 0.6056330800056458, "rewards/margins": -0.2540966868400574, "rewards/rejected": 0.8597297668457031, "step": 1256 }, { "epoch": 2.27, "learning_rate": 1.4627407217810317e-08, "logits/chosen": -2.3251922130584717, "logits/rejected": -2.321958065032959, "logps/chosen": -1.353725790977478, "logps/rejected": -2.7080984115600586, "loss": 0.5833, "rewards/accuracies": 1.0, "rewards/chosen": 0.9094443321228027, "rewards/margins": 0.27786093950271606, "rewards/rejected": 0.6315833926200867, "step": 1257 }, { "epoch": 2.27, "learning_rate": 1.4558476649388357e-08, "logits/chosen": -2.423858642578125, "logits/rejected": -2.4195711612701416, "logps/chosen": -2.48921799659729, "logps/rejected": -3.251384735107422, "loss": 0.664, "rewards/accuracies": 1.0, "rewards/chosen": 1.0885884761810303, "rewards/margins": 0.5106518268585205, "rewards/rejected": 0.5779366493225098, "step": 1258 }, { "epoch": 2.28, "learning_rate": 1.448968119482591e-08, "logits/chosen": -2.327683210372925, "logits/rejected": -2.3276145458221436, "logps/chosen": -3.112006425857544, "logps/rejected": -3.0750784873962402, "loss": 0.6231, "rewards/accuracies": 1.0, "rewards/chosen": 1.0831562280654907, "rewards/margins": 0.3958272337913513, "rewards/rejected": 0.6873289942741394, "step": 1259 }, { "epoch": 2.28, "learning_rate": 1.4421021116392135e-08, "logits/chosen": -2.3110342025756836, "logits/rejected": -2.309281349182129, "logps/chosen": -4.61099910736084, "logps/rejected": -7.429590702056885, "loss": 0.9037, "rewards/accuracies": 0.0, "rewards/chosen": 0.5417420268058777, "rewards/margins": -0.5213622450828552, "rewards/rejected": 1.063104271888733, "step": 1260 }, { "epoch": 2.28, "learning_rate": 1.4352496675840142e-08, "logits/chosen": -2.2586498260498047, "logits/rejected": -2.2581710815429688, "logps/chosen": -2.119218587875366, "logps/rejected": -3.384652614593506, "loss": 0.5701, "rewards/accuracies": 1.0, "rewards/chosen": 0.802030086517334, "rewards/margins": 0.1294795274734497, "rewards/rejected": 0.6725505590438843, "step": 1261 }, { "epoch": 2.28, "learning_rate": 1.4284108134405936e-08, "logits/chosen": -2.354431390762329, "logits/rejected": -2.3043317794799805, "logps/chosen": -7.263949871063232, "logps/rejected": -2.85383939743042, "loss": 0.7743, "rewards/accuracies": 0.0, "rewards/chosen": 0.573578417301178, "rewards/margins": -0.15025299787521362, "rewards/rejected": 0.7238314151763916, "step": 1262 }, { "epoch": 2.28, "learning_rate": 1.4215855752807415e-08, "logits/chosen": -2.401447057723999, "logits/rejected": -2.4030730724334717, "logps/chosen": -1.4441285133361816, "logps/rejected": -5.826325416564941, "loss": 0.4666, "rewards/accuracies": 1.0, "rewards/chosen": 0.8518356680870056, "rewards/margins": 0.41604647040367126, "rewards/rejected": 0.43578919768333435, "step": 1263 }, { "epoch": 2.29, "learning_rate": 1.4147739791243397e-08, "logits/chosen": -2.26340389251709, "logits/rejected": -2.252218723297119, "logps/chosen": -3.167679786682129, "logps/rejected": -3.1704230308532715, "loss": 0.5486, "rewards/accuracies": 1.0, "rewards/chosen": 0.8285657167434692, "rewards/margins": 0.31236863136291504, "rewards/rejected": 0.5161970853805542, "step": 1264 }, { "epoch": 2.29, "learning_rate": 1.4079760509392618e-08, "logits/chosen": -2.235257625579834, "logits/rejected": -2.2438888549804688, "logps/chosen": -6.034542083740234, "logps/rejected": -2.166699171066284, "loss": 0.7025, "rewards/accuracies": 0.0, "rewards/chosen": 0.509996235370636, "rewards/margins": -0.05159348249435425, "rewards/rejected": 0.5615897178649902, "step": 1265 }, { "epoch": 2.29, "learning_rate": 1.4011918166412795e-08, "logits/chosen": -2.365880250930786, "logits/rejected": -2.364400863647461, "logps/chosen": -6.530887126922607, "logps/rejected": -4.674458980560303, "loss": 0.6092, "rewards/accuracies": 1.0, "rewards/chosen": 0.9470352530479431, "rewards/margins": 0.3696138858795166, "rewards/rejected": 0.5774213671684265, "step": 1266 }, { "epoch": 2.29, "learning_rate": 1.3944213020939527e-08, "logits/chosen": -2.2398736476898193, "logits/rejected": -2.244971752166748, "logps/chosen": -2.91452956199646, "logps/rejected": -3.7768187522888184, "loss": 0.6093, "rewards/accuracies": 0.0, "rewards/chosen": 0.5807204246520996, "rewards/margins": -0.18645721673965454, "rewards/rejected": 0.7671776413917542, "step": 1267 }, { "epoch": 2.29, "learning_rate": 1.3876645331085446e-08, "logits/chosen": -2.3988561630249023, "logits/rejected": -2.467090129852295, "logps/chosen": -4.390206336975098, "logps/rejected": -27.12212562561035, "loss": 0.5974, "rewards/accuracies": 1.0, "rewards/chosen": 0.7126799821853638, "rewards/margins": 0.7907652854919434, "rewards/rejected": -0.07808532565832138, "step": 1268 }, { "epoch": 2.29, "learning_rate": 1.380921535443908e-08, "logits/chosen": -2.2785823345184326, "logits/rejected": -2.364074945449829, "logps/chosen": -2.4519031047821045, "logps/rejected": -42.181095123291016, "loss": 0.4571, "rewards/accuracies": 1.0, "rewards/chosen": 0.9210575222969055, "rewards/margins": 1.2228362560272217, "rewards/rejected": -0.30177879333496094, "step": 1269 }, { "epoch": 2.3, "learning_rate": 1.3741923348064033e-08, "logits/chosen": -2.2681565284729004, "logits/rejected": -2.3358967304229736, "logps/chosen": -1.7479236125946045, "logps/rejected": -34.706581115722656, "loss": 0.7033, "rewards/accuracies": 1.0, "rewards/chosen": 0.7095141410827637, "rewards/margins": 0.1930815577507019, "rewards/rejected": 0.5164325833320618, "step": 1270 }, { "epoch": 2.3, "learning_rate": 1.3674769568497868e-08, "logits/chosen": -2.2665531635284424, "logits/rejected": -2.271148204803467, "logps/chosen": -7.120951175689697, "logps/rejected": -0.8631801605224609, "loss": 0.6587, "rewards/accuracies": 0.0, "rewards/chosen": 0.2891928255558014, "rewards/margins": -0.18842777609825134, "rewards/rejected": 0.47762060165405273, "step": 1271 }, { "epoch": 2.3, "learning_rate": 1.3607754271751198e-08, "logits/chosen": -2.42797589302063, "logits/rejected": -2.313427448272705, "logps/chosen": -22.86045265197754, "logps/rejected": -2.5419929027557373, "loss": 0.9323, "rewards/accuracies": 0.0, "rewards/chosen": 0.07670535892248154, "rewards/margins": -0.5166513919830322, "rewards/rejected": 0.593356728553772, "step": 1272 }, { "epoch": 2.3, "learning_rate": 1.3540877713306708e-08, "logits/chosen": -2.419132947921753, "logits/rejected": -2.3993618488311768, "logps/chosen": -8.114201545715332, "logps/rejected": 0.0, "loss": 0.426, "rewards/accuracies": 1.0, "rewards/chosen": 0.7267252206802368, "rewards/margins": 0.7267252206802368, "rewards/rejected": 0.0, "step": 1273 }, { "epoch": 2.3, "learning_rate": 1.3474140148118185e-08, "logits/chosen": -2.3675379753112793, "logits/rejected": -2.3642945289611816, "logps/chosen": -1.0199638605117798, "logps/rejected": -5.956565856933594, "loss": 0.7885, "rewards/accuracies": 0.0, "rewards/chosen": 0.5021294355392456, "rewards/margins": -0.20830684900283813, "rewards/rejected": 0.7104362845420837, "step": 1274 }, { "epoch": 2.31, "learning_rate": 1.3407541830609497e-08, "logits/chosen": -2.2157158851623535, "logits/rejected": -2.2810235023498535, "logps/chosen": -1.2677586078643799, "logps/rejected": -28.33307647705078, "loss": 0.6424, "rewards/accuracies": 1.0, "rewards/chosen": 1.0136134624481201, "rewards/margins": 0.7961074113845825, "rewards/rejected": 0.2175060361623764, "step": 1275 }, { "epoch": 2.31, "learning_rate": 1.3341083014673676e-08, "logits/chosen": -2.361659526824951, "logits/rejected": -2.3297672271728516, "logps/chosen": -2.788362503051758, "logps/rejected": 0.0, "loss": 0.4103, "rewards/accuracies": 1.0, "rewards/chosen": 0.8313634991645813, "rewards/margins": 0.8313634991645813, "rewards/rejected": 0.0, "step": 1276 }, { "epoch": 2.31, "learning_rate": 1.327476395367193e-08, "logits/chosen": -2.364311695098877, "logits/rejected": -2.3392982482910156, "logps/chosen": -0.8400543928146362, "logps/rejected": 0.0, "loss": 0.551, "rewards/accuracies": 1.0, "rewards/chosen": 0.47339311242103577, "rewards/margins": 0.47339311242103577, "rewards/rejected": 0.0, "step": 1277 }, { "epoch": 2.31, "learning_rate": 1.3208584900432651e-08, "logits/chosen": -2.3600871562957764, "logits/rejected": -2.3607373237609863, "logps/chosen": -0.9099445343017578, "logps/rejected": -5.269608497619629, "loss": 0.6923, "rewards/accuracies": 1.0, "rewards/chosen": 0.6984748244285583, "rewards/margins": 0.2679550349712372, "rewards/rejected": 0.43051978945732117, "step": 1278 }, { "epoch": 2.31, "learning_rate": 1.3142546107250536e-08, "logits/chosen": -2.1748507022857666, "logits/rejected": -2.176039695739746, "logps/chosen": -2.041137933731079, "logps/rejected": -4.64041805267334, "loss": 0.9223, "rewards/accuracies": 0.0, "rewards/chosen": 0.3981996476650238, "rewards/margins": -0.5224019289016724, "rewards/rejected": 0.9206015467643738, "step": 1279 }, { "epoch": 2.31, "learning_rate": 1.3076647825885539e-08, "logits/chosen": -2.245840072631836, "logits/rejected": -2.2255399227142334, "logps/chosen": -5.402414321899414, "logps/rejected": -2.4523797035217285, "loss": 0.5731, "rewards/accuracies": 1.0, "rewards/chosen": 1.0778770446777344, "rewards/margins": 0.5565489530563354, "rewards/rejected": 0.5213280916213989, "step": 1280 }, { "epoch": 2.32, "learning_rate": 1.3010890307561928e-08, "logits/chosen": -2.3705272674560547, "logits/rejected": -2.3423547744750977, "logps/chosen": -2.293820858001709, "logps/rejected": 0.0, "loss": 0.5415, "rewards/accuracies": 1.0, "rewards/chosen": 0.7059730887413025, "rewards/margins": 0.7059730887413025, "rewards/rejected": 0.0, "step": 1281 }, { "epoch": 2.32, "learning_rate": 1.2945273802967337e-08, "logits/chosen": -2.29008150100708, "logits/rejected": -2.3723838329315186, "logps/chosen": -1.4497607946395874, "logps/rejected": -23.988162994384766, "loss": 0.5718, "rewards/accuracies": 1.0, "rewards/chosen": 0.7456285357475281, "rewards/margins": 0.015154421329498291, "rewards/rejected": 0.7304741144180298, "step": 1282 }, { "epoch": 2.32, "learning_rate": 1.2879798562251853e-08, "logits/chosen": -2.280641794204712, "logits/rejected": -2.3653743267059326, "logps/chosen": -1.2605066299438477, "logps/rejected": -30.78913116455078, "loss": 0.6033, "rewards/accuracies": 1.0, "rewards/chosen": 0.6953789591789246, "rewards/margins": 0.3526405990123749, "rewards/rejected": 0.3427383601665497, "step": 1283 }, { "epoch": 2.32, "learning_rate": 1.2814464835026984e-08, "logits/chosen": -2.3145389556884766, "logits/rejected": -2.3209311962127686, "logps/chosen": -1.8259185552597046, "logps/rejected": -12.679887771606445, "loss": 0.5306, "rewards/accuracies": 1.0, "rewards/chosen": 0.7996413707733154, "rewards/margins": 0.44084447622299194, "rewards/rejected": 0.3587968945503235, "step": 1284 }, { "epoch": 2.32, "learning_rate": 1.2749272870364736e-08, "logits/chosen": -2.2532312870025635, "logits/rejected": -2.4131832122802734, "logps/chosen": -1.083545446395874, "logps/rejected": -32.30182647705078, "loss": 0.5002, "rewards/accuracies": 1.0, "rewards/chosen": 0.7483920454978943, "rewards/margins": 0.5927737355232239, "rewards/rejected": 0.15561829507350922, "step": 1285 }, { "epoch": 2.33, "learning_rate": 1.2684222916796711e-08, "logits/chosen": -2.2829039096832275, "logits/rejected": -2.271808385848999, "logps/chosen": -3.8489463329315186, "logps/rejected": -5.436720371246338, "loss": 0.8041, "rewards/accuracies": 0.0, "rewards/chosen": 0.6225643157958984, "rewards/margins": -0.3748222589492798, "rewards/rejected": 0.9973865747451782, "step": 1286 }, { "epoch": 2.33, "learning_rate": 1.2619315222313121e-08, "logits/chosen": -2.2434098720550537, "logits/rejected": -2.2467713356018066, "logps/chosen": -2.385277032852173, "logps/rejected": -9.492485046386719, "loss": 0.8386, "rewards/accuracies": 0.0, "rewards/chosen": 0.5682719349861145, "rewards/margins": -0.30046379566192627, "rewards/rejected": 0.8687357306480408, "step": 1287 }, { "epoch": 2.33, "learning_rate": 1.2554550034361805e-08, "logits/chosen": -2.356653928756714, "logits/rejected": -2.32828426361084, "logps/chosen": -1.963849663734436, "logps/rejected": 0.0, "loss": 0.5039, "rewards/accuracies": 1.0, "rewards/chosen": 0.9946174621582031, "rewards/margins": 0.9946174621582031, "rewards/rejected": 0.0, "step": 1288 }, { "epoch": 2.33, "learning_rate": 1.248992759984735e-08, "logits/chosen": -2.3530874252319336, "logits/rejected": -2.3482797145843506, "logps/chosen": -7.870249271392822, "logps/rejected": -2.61976957321167, "loss": 0.5596, "rewards/accuracies": 1.0, "rewards/chosen": 0.938933789730072, "rewards/margins": 0.43240076303482056, "rewards/rejected": 0.5065330266952515, "step": 1289 }, { "epoch": 2.33, "learning_rate": 1.242544816513012e-08, "logits/chosen": -2.3986616134643555, "logits/rejected": -2.4221644401550293, "logps/chosen": -1.3303651809692383, "logps/rejected": -4.1658549308776855, "loss": 0.7499, "rewards/accuracies": 0.0, "rewards/chosen": 0.6498503088951111, "rewards/margins": -0.07798236608505249, "rewards/rejected": 0.7278326749801636, "step": 1290 }, { "epoch": 2.33, "learning_rate": 1.2361111976025301e-08, "logits/chosen": -2.2806320190429688, "logits/rejected": -2.2960140705108643, "logps/chosen": -3.4750092029571533, "logps/rejected": -19.217761993408203, "loss": 0.5477, "rewards/accuracies": 1.0, "rewards/chosen": 0.7527520060539246, "rewards/margins": 0.3458714187145233, "rewards/rejected": 0.40688058733940125, "step": 1291 }, { "epoch": 2.34, "learning_rate": 1.2296919277802014e-08, "logits/chosen": -2.221287488937378, "logits/rejected": -2.2225425243377686, "logps/chosen": -1.951798439025879, "logps/rejected": -3.4797332286834717, "loss": 0.4796, "rewards/accuracies": 1.0, "rewards/chosen": 0.7783921360969543, "rewards/margins": 0.4100651443004608, "rewards/rejected": 0.36832699179649353, "step": 1292 }, { "epoch": 2.34, "learning_rate": 1.2232870315182354e-08, "logits/chosen": -2.3450682163238525, "logits/rejected": -2.3103065490722656, "logps/chosen": -1.8237838745117188, "logps/rejected": 0.0, "loss": 0.5196, "rewards/accuracies": 1.0, "rewards/chosen": 0.544334888458252, "rewards/margins": 0.544334888458252, "rewards/rejected": 0.0, "step": 1293 }, { "epoch": 2.34, "learning_rate": 1.216896533234042e-08, "logits/chosen": -2.448251962661743, "logits/rejected": -2.4508488178253174, "logps/chosen": -3.900604248046875, "logps/rejected": -10.551332473754883, "loss": 0.7087, "rewards/accuracies": 0.0, "rewards/chosen": 0.8363056182861328, "rewards/margins": -0.11918014287948608, "rewards/rejected": 0.9554857611656189, "step": 1294 }, { "epoch": 2.34, "learning_rate": 1.2105204572901411e-08, "logits/chosen": -2.2585220336914062, "logits/rejected": -2.2594921588897705, "logps/chosen": -2.0044071674346924, "logps/rejected": -1.605057716369629, "loss": 0.5431, "rewards/accuracies": 1.0, "rewards/chosen": 0.5441476106643677, "rewards/margins": 0.03359347581863403, "rewards/rejected": 0.5105541348457336, "step": 1295 }, { "epoch": 2.34, "learning_rate": 1.2041588279940757e-08, "logits/chosen": -2.304229497909546, "logits/rejected": -2.3256120681762695, "logps/chosen": -7.871979713439941, "logps/rejected": -13.091161727905273, "loss": 0.7081, "rewards/accuracies": 1.0, "rewards/chosen": 0.5332457423210144, "rewards/margins": 0.04581153392791748, "rewards/rejected": 0.4874342083930969, "step": 1296 }, { "epoch": 2.35, "learning_rate": 1.1978116695983071e-08, "logits/chosen": -2.340343475341797, "logits/rejected": -2.339716672897339, "logps/chosen": -2.5820960998535156, "logps/rejected": -3.765941619873047, "loss": 0.5894, "rewards/accuracies": 1.0, "rewards/chosen": 0.7434444427490234, "rewards/margins": 0.1639440655708313, "rewards/rejected": 0.5795003771781921, "step": 1297 }, { "epoch": 2.35, "learning_rate": 1.1914790063001356e-08, "logits/chosen": -2.3381130695343018, "logits/rejected": -2.3403337001800537, "logps/chosen": -3.6389710903167725, "logps/rejected": -3.4643542766571045, "loss": 0.6188, "rewards/accuracies": 1.0, "rewards/chosen": 0.6806239485740662, "rewards/margins": 0.21245768666267395, "rewards/rejected": 0.4681662619113922, "step": 1298 }, { "epoch": 2.35, "learning_rate": 1.1851608622415948e-08, "logits/chosen": -2.3121461868286133, "logits/rejected": -2.3165533542633057, "logps/chosen": -1.7159156799316406, "logps/rejected": -2.7430968284606934, "loss": 0.4859, "rewards/accuracies": 1.0, "rewards/chosen": 1.0131536722183228, "rewards/margins": 0.4256114363670349, "rewards/rejected": 0.5875422358512878, "step": 1299 }, { "epoch": 2.35, "learning_rate": 1.178857261509374e-08, "logits/chosen": -2.435349464416504, "logits/rejected": -2.45926570892334, "logps/chosen": -3.452927827835083, "logps/rejected": -48.70199203491211, "loss": 0.3892, "rewards/accuracies": 1.0, "rewards/chosen": 0.6127010583877563, "rewards/margins": 1.3137524127960205, "rewards/rejected": -0.7010513544082642, "step": 1300 }, { "epoch": 2.35, "learning_rate": 1.1725682281347132e-08, "logits/chosen": -2.287363290786743, "logits/rejected": -2.453395128250122, "logps/chosen": -1.8056334257125854, "logps/rejected": -47.062252044677734, "loss": 0.7205, "rewards/accuracies": 1.0, "rewards/chosen": 0.7770973443984985, "rewards/margins": 0.11171454191207886, "rewards/rejected": 0.6653828024864197, "step": 1301 }, { "epoch": 2.35, "learning_rate": 1.1662937860933198e-08, "logits/chosen": -2.3909080028533936, "logits/rejected": -2.385406255722046, "logps/chosen": -1.7890385389328003, "logps/rejected": -3.981551170349121, "loss": 0.5975, "rewards/accuracies": 1.0, "rewards/chosen": 0.7996954321861267, "rewards/margins": 0.2691580057144165, "rewards/rejected": 0.5305374264717102, "step": 1302 }, { "epoch": 2.36, "learning_rate": 1.1600339593052722e-08, "logits/chosen": -2.1558871269226074, "logits/rejected": -2.156576633453369, "logps/chosen": -3.437417984008789, "logps/rejected": -1.4122953414916992, "loss": 0.6146, "rewards/accuracies": 1.0, "rewards/chosen": 0.6747340559959412, "rewards/margins": 0.08889037370681763, "rewards/rejected": 0.5858436822891235, "step": 1303 }, { "epoch": 2.36, "learning_rate": 1.1537887716349337e-08, "logits/chosen": -2.3190343379974365, "logits/rejected": -2.319823980331421, "logps/chosen": -1.7057538032531738, "logps/rejected": -1.6541953086853027, "loss": 0.5735, "rewards/accuracies": 1.0, "rewards/chosen": 0.5929873585700989, "rewards/margins": 0.1176190972328186, "rewards/rejected": 0.4753682613372803, "step": 1304 }, { "epoch": 2.36, "learning_rate": 1.14755824689086e-08, "logits/chosen": -2.298732280731201, "logits/rejected": -2.2921459674835205, "logps/chosen": -0.43236204981803894, "logps/rejected": -2.5818722248077393, "loss": 0.6198, "rewards/accuracies": 0.0, "rewards/chosen": 0.6902967095375061, "rewards/margins": -0.003361821174621582, "rewards/rejected": 0.6936585307121277, "step": 1305 }, { "epoch": 2.36, "learning_rate": 1.1413424088257034e-08, "logits/chosen": -2.3597981929779053, "logits/rejected": -2.3495779037475586, "logps/chosen": -3.5348944664001465, "logps/rejected": -5.780115604400635, "loss": 0.6502, "rewards/accuracies": 0.0, "rewards/chosen": 0.8010149002075195, "rewards/margins": -0.14894503355026245, "rewards/rejected": 0.949959933757782, "step": 1306 }, { "epoch": 2.36, "learning_rate": 1.1351412811361282e-08, "logits/chosen": -2.2704050540924072, "logits/rejected": -2.273538112640381, "logps/chosen": -0.738353431224823, "logps/rejected": -3.1727497577667236, "loss": 0.6579, "rewards/accuracies": 1.0, "rewards/chosen": 0.823678195476532, "rewards/margins": 0.31710660457611084, "rewards/rejected": 0.5065715909004211, "step": 1307 }, { "epoch": 2.37, "learning_rate": 1.128954887462717e-08, "logits/chosen": -2.3366949558258057, "logits/rejected": -2.3590545654296875, "logps/chosen": -2.3209681510925293, "logps/rejected": -23.12476921081543, "loss": 0.7066, "rewards/accuracies": 1.0, "rewards/chosen": 0.5942480564117432, "rewards/margins": 0.14999070763587952, "rewards/rejected": 0.44425734877586365, "step": 1308 }, { "epoch": 2.37, "learning_rate": 1.1227832513898849e-08, "logits/chosen": -2.271655559539795, "logits/rejected": -2.268975257873535, "logps/chosen": -0.6580790877342224, "logps/rejected": -7.342036247253418, "loss": 0.6161, "rewards/accuracies": 0.0, "rewards/chosen": 0.5276098251342773, "rewards/margins": -0.18171930313110352, "rewards/rejected": 0.7093291282653809, "step": 1309 }, { "epoch": 2.37, "learning_rate": 1.1166263964457818e-08, "logits/chosen": -2.343324661254883, "logits/rejected": -2.3409485816955566, "logps/chosen": -2.9414114952087402, "logps/rejected": -0.45086443424224854, "loss": 0.629, "rewards/accuracies": 0.0, "rewards/chosen": 0.5027648210525513, "rewards/margins": -0.046291887760162354, "rewards/rejected": 0.5490567088127136, "step": 1310 }, { "epoch": 2.37, "learning_rate": 1.1104843461022128e-08, "logits/chosen": -2.2484025955200195, "logits/rejected": -2.243713140487671, "logps/chosen": -5.418728828430176, "logps/rejected": -5.980016708374023, "loss": 0.6144, "rewards/accuracies": 1.0, "rewards/chosen": 0.6616159677505493, "rewards/margins": 0.10154610872268677, "rewards/rejected": 0.5600698590278625, "step": 1311 }, { "epoch": 2.37, "learning_rate": 1.1043571237745386e-08, "logits/chosen": -2.2515547275543213, "logits/rejected": -2.3223235607147217, "logps/chosen": -1.3220921754837036, "logps/rejected": -26.717166900634766, "loss": 0.4991, "rewards/accuracies": 1.0, "rewards/chosen": 0.7419081926345825, "rewards/margins": 0.0526118278503418, "rewards/rejected": 0.6892963647842407, "step": 1312 }, { "epoch": 2.37, "learning_rate": 1.0982447528215955e-08, "logits/chosen": -2.4895365238189697, "logits/rejected": -2.4713680744171143, "logps/chosen": -1.622513771057129, "logps/rejected": 0.0, "loss": 0.5254, "rewards/accuracies": 1.0, "rewards/chosen": 0.8441370129585266, "rewards/margins": 0.8441370129585266, "rewards/rejected": 0.0, "step": 1313 }, { "epoch": 2.38, "learning_rate": 1.092147256545598e-08, "logits/chosen": -2.2412259578704834, "logits/rejected": -2.2436938285827637, "logps/chosen": -3.8787569999694824, "logps/rejected": -3.369497060775757, "loss": 0.5752, "rewards/accuracies": 1.0, "rewards/chosen": 0.8465806841850281, "rewards/margins": 0.3316417336463928, "rewards/rejected": 0.5149389505386353, "step": 1314 }, { "epoch": 2.38, "learning_rate": 1.0860646581920552e-08, "logits/chosen": -2.4132909774780273, "logits/rejected": -2.4096696376800537, "logps/chosen": -1.2571500539779663, "logps/rejected": -22.08966827392578, "loss": 0.4962, "rewards/accuracies": 1.0, "rewards/chosen": 0.683643102645874, "rewards/margins": 0.7936910390853882, "rewards/rejected": -0.11004791408777237, "step": 1315 }, { "epoch": 2.38, "learning_rate": 1.0799969809496789e-08, "logits/chosen": -2.170821189880371, "logits/rejected": -2.1451406478881836, "logps/chosen": -5.772161483764648, "logps/rejected": 0.0, "loss": 0.6914, "rewards/accuracies": 1.0, "rewards/chosen": 1.0105470418930054, "rewards/margins": 1.0105470418930054, "rewards/rejected": 0.0, "step": 1316 }, { "epoch": 2.38, "learning_rate": 1.0739442479503018e-08, "logits/chosen": -2.2844903469085693, "logits/rejected": -2.2800605297088623, "logps/chosen": -1.5259302854537964, "logps/rejected": -2.0562803745269775, "loss": 0.6838, "rewards/accuracies": 0.0, "rewards/chosen": 0.6187611818313599, "rewards/margins": -0.15061575174331665, "rewards/rejected": 0.7693769335746765, "step": 1317 }, { "epoch": 2.38, "learning_rate": 1.0679064822687822e-08, "logits/chosen": -2.386683702468872, "logits/rejected": -2.387014865875244, "logps/chosen": -1.9348869323730469, "logps/rejected": -5.085967540740967, "loss": 0.4547, "rewards/accuracies": 1.0, "rewards/chosen": 0.8002292513847351, "rewards/margins": 0.4332055449485779, "rewards/rejected": 0.3670237064361572, "step": 1318 }, { "epoch": 2.39, "learning_rate": 1.0618837069229164e-08, "logits/chosen": -2.3664019107818604, "logits/rejected": -2.3638200759887695, "logps/chosen": -2.3749256134033203, "logps/rejected": -2.2315168380737305, "loss": 0.7354, "rewards/accuracies": 1.0, "rewards/chosen": 0.7741823196411133, "rewards/margins": 0.11801296472549438, "rewards/rejected": 0.6561693549156189, "step": 1319 }, { "epoch": 2.39, "learning_rate": 1.0558759448733556e-08, "logits/chosen": -2.216423511505127, "logits/rejected": -2.234389305114746, "logps/chosen": -2.568439245223999, "logps/rejected": -10.973273277282715, "loss": 0.7975, "rewards/accuracies": 0.0, "rewards/chosen": 0.6537632346153259, "rewards/margins": -0.21110773086547852, "rewards/rejected": 0.8648709654808044, "step": 1320 }, { "epoch": 2.39, "learning_rate": 1.049883219023513e-08, "logits/chosen": -2.2330687046051025, "logits/rejected": -2.202688217163086, "logps/chosen": -1.452302098274231, "logps/rejected": 0.0, "loss": 0.5668, "rewards/accuracies": 1.0, "rewards/chosen": 0.6128747463226318, "rewards/margins": 0.6128747463226318, "rewards/rejected": 0.0, "step": 1321 }, { "epoch": 2.39, "learning_rate": 1.0439055522194824e-08, "logits/chosen": -2.3600635528564453, "logits/rejected": -2.3581464290618896, "logps/chosen": -1.3793628215789795, "logps/rejected": -5.360290050506592, "loss": 0.6221, "rewards/accuracies": 1.0, "rewards/chosen": 0.730623185634613, "rewards/margins": 0.33879610896110535, "rewards/rejected": 0.3918270766735077, "step": 1322 }, { "epoch": 2.39, "learning_rate": 1.0379429672499478e-08, "logits/chosen": -2.195720672607422, "logits/rejected": -2.1914243698120117, "logps/chosen": -1.5083603858947754, "logps/rejected": -4.434972286224365, "loss": 0.7831, "rewards/accuracies": 1.0, "rewards/chosen": 0.516979992389679, "rewards/margins": 0.15261682868003845, "rewards/rejected": 0.3643631637096405, "step": 1323 }, { "epoch": 2.39, "learning_rate": 1.0319954868460945e-08, "logits/chosen": -2.2397804260253906, "logits/rejected": -2.247136354446411, "logps/chosen": -1.9045417308807373, "logps/rejected": -4.976585388183594, "loss": 0.8056, "rewards/accuracies": 1.0, "rewards/chosen": 0.6926304697990417, "rewards/margins": 0.20046347379684448, "rewards/rejected": 0.49216699600219727, "step": 1324 }, { "epoch": 2.4, "learning_rate": 1.0260631336815235e-08, "logits/chosen": -2.2779672145843506, "logits/rejected": -2.425495147705078, "logps/chosen": -1.1246916055679321, "logps/rejected": -22.995975494384766, "loss": 0.4892, "rewards/accuracies": 1.0, "rewards/chosen": 0.7776076793670654, "rewards/margins": 0.3564979135990143, "rewards/rejected": 0.42110976576805115, "step": 1325 }, { "epoch": 2.4, "learning_rate": 1.0201459303721716e-08, "logits/chosen": -2.319216728210449, "logits/rejected": -2.3189659118652344, "logps/chosen": -0.7648289799690247, "logps/rejected": -6.492816925048828, "loss": 0.6687, "rewards/accuracies": 0.0, "rewards/chosen": 0.7939816117286682, "rewards/margins": -0.08411681652069092, "rewards/rejected": 0.8780984282493591, "step": 1326 }, { "epoch": 2.4, "learning_rate": 1.0142438994762142e-08, "logits/chosen": -2.237481117248535, "logits/rejected": -2.237215518951416, "logps/chosen": -2.186765432357788, "logps/rejected": -3.3868532180786133, "loss": 0.5134, "rewards/accuracies": 1.0, "rewards/chosen": 0.6661258935928345, "rewards/margins": 0.16574281454086304, "rewards/rejected": 0.5003830790519714, "step": 1327 }, { "epoch": 2.4, "learning_rate": 1.0083570634939864e-08, "logits/chosen": -2.3247034549713135, "logits/rejected": -2.332362651824951, "logps/chosen": -1.8399832248687744, "logps/rejected": -13.24703598022461, "loss": 0.5234, "rewards/accuracies": 0.0, "rewards/chosen": 0.6603630185127258, "rewards/margins": -0.16180533170700073, "rewards/rejected": 0.8221683502197266, "step": 1328 }, { "epoch": 2.4, "learning_rate": 1.0024854448678987e-08, "logits/chosen": -2.358548641204834, "logits/rejected": -2.357459306716919, "logps/chosen": -2.8238396644592285, "logps/rejected": -1.766852617263794, "loss": 0.7631, "rewards/accuracies": 1.0, "rewards/chosen": 0.5968909859657288, "rewards/margins": 0.06586325168609619, "rewards/rejected": 0.5310277342796326, "step": 1329 }, { "epoch": 2.41, "learning_rate": 9.966290659823434e-09, "logits/chosen": -2.338048219680786, "logits/rejected": -2.415447235107422, "logps/chosen": -2.3840136528015137, "logps/rejected": -37.43931579589844, "loss": 0.5651, "rewards/accuracies": 1.0, "rewards/chosen": 0.5074375867843628, "rewards/margins": 0.4145413041114807, "rewards/rejected": 0.09289627522230148, "step": 1330 }, { "epoch": 2.41, "learning_rate": 9.907879491636201e-09, "logits/chosen": -2.28401780128479, "logits/rejected": -2.2741942405700684, "logps/chosen": -3.9640610218048096, "logps/rejected": -6.367861270904541, "loss": 0.5888, "rewards/accuracies": 1.0, "rewards/chosen": 0.7703648805618286, "rewards/margins": 0.43198543787002563, "rewards/rejected": 0.338379442691803, "step": 1331 }, { "epoch": 2.41, "learning_rate": 9.849621166798389e-09, "logits/chosen": -2.325424909591675, "logits/rejected": -2.28615403175354, "logps/chosen": -8.004594802856445, "logps/rejected": -1.7062957286834717, "loss": 0.5642, "rewards/accuracies": 1.0, "rewards/chosen": 0.9386143088340759, "rewards/margins": 0.3945889472961426, "rewards/rejected": 0.5440253615379333, "step": 1332 }, { "epoch": 2.41, "learning_rate": 9.791515907408448e-09, "logits/chosen": -2.3895626068115234, "logits/rejected": -2.4830825328826904, "logps/chosen": -6.1461968421936035, "logps/rejected": -25.717992782592773, "loss": 0.7836, "rewards/accuracies": 0.0, "rewards/chosen": 0.6087517142295837, "rewards/margins": -0.3554506301879883, "rewards/rejected": 0.964202344417572, "step": 1333 }, { "epoch": 2.41, "learning_rate": 9.733563934981271e-09, "logits/chosen": -2.388291120529175, "logits/rejected": -2.401719331741333, "logps/chosen": -5.804018020629883, "logps/rejected": -10.419454574584961, "loss": 0.5454, "rewards/accuracies": 1.0, "rewards/chosen": 0.9509401321411133, "rewards/margins": 0.7167143821716309, "rewards/rejected": 0.23422574996948242, "step": 1334 }, { "epoch": 2.41, "learning_rate": 9.67576547044741e-09, "logits/chosen": -2.2451629638671875, "logits/rejected": -2.246558427810669, "logps/chosen": -2.8611316680908203, "logps/rejected": -10.668638229370117, "loss": 0.9762, "rewards/accuracies": 0.0, "rewards/chosen": 0.7244367599487305, "rewards/margins": -0.23665428161621094, "rewards/rejected": 0.9610910415649414, "step": 1335 }, { "epoch": 2.42, "learning_rate": 9.61812073415219e-09, "logits/chosen": -2.2654497623443604, "logits/rejected": -2.267585515975952, "logps/chosen": -2.084291696548462, "logps/rejected": -4.845256805419922, "loss": 0.6785, "rewards/accuracies": 0.0, "rewards/chosen": 0.49183008074760437, "rewards/margins": -0.14981362223625183, "rewards/rejected": 0.6416437029838562, "step": 1336 }, { "epoch": 2.42, "learning_rate": 9.560629945854853e-09, "logits/chosen": -2.2355499267578125, "logits/rejected": -2.220325469970703, "logps/chosen": -2.4493064880371094, "logps/rejected": -1.9366276264190674, "loss": 0.7074, "rewards/accuracies": 1.0, "rewards/chosen": 0.7349319458007812, "rewards/margins": 0.14286738634109497, "rewards/rejected": 0.5920645594596863, "step": 1337 }, { "epoch": 2.42, "learning_rate": 9.503293324727745e-09, "logits/chosen": -2.402966260910034, "logits/rejected": -2.4047164916992188, "logps/chosen": -1.4887175559997559, "logps/rejected": -2.112635374069214, "loss": 0.5518, "rewards/accuracies": 1.0, "rewards/chosen": 0.6132884621620178, "rewards/margins": 0.08474582433700562, "rewards/rejected": 0.5285426378250122, "step": 1338 }, { "epoch": 2.42, "learning_rate": 9.446111089355519e-09, "logits/chosen": -2.3371427059173584, "logits/rejected": -2.3405332565307617, "logps/chosen": -0.9877125024795532, "logps/rejected": -2.372749090194702, "loss": 0.6271, "rewards/accuracies": 1.0, "rewards/chosen": 0.809417724609375, "rewards/margins": 0.3050804138183594, "rewards/rejected": 0.5043373107910156, "step": 1339 }, { "epoch": 2.42, "learning_rate": 9.389083457734231e-09, "logits/chosen": -2.267364978790283, "logits/rejected": -2.26534366607666, "logps/chosen": -2.8922226428985596, "logps/rejected": -4.06390380859375, "loss": 0.5948, "rewards/accuracies": 1.0, "rewards/chosen": 0.7991015315055847, "rewards/margins": 0.08814418315887451, "rewards/rejected": 0.7109573483467102, "step": 1340 }, { "epoch": 2.42, "learning_rate": 9.332210647270521e-09, "logits/chosen": -2.257215738296509, "logits/rejected": -2.2501981258392334, "logps/chosen": -0.9928908944129944, "logps/rejected": -7.664509296417236, "loss": 0.5396, "rewards/accuracies": 1.0, "rewards/chosen": 0.8560024499893188, "rewards/margins": 0.6464782357215881, "rewards/rejected": 0.20952419936656952, "step": 1341 }, { "epoch": 2.43, "learning_rate": 9.275492874780855e-09, "logits/chosen": -2.326620101928711, "logits/rejected": -2.2961275577545166, "logps/chosen": -2.7620577812194824, "logps/rejected": 0.0, "loss": 0.5199, "rewards/accuracies": 1.0, "rewards/chosen": 0.6913549900054932, "rewards/margins": 0.6913549900054932, "rewards/rejected": 0.0, "step": 1342 }, { "epoch": 2.43, "learning_rate": 9.218930356490623e-09, "logits/chosen": -2.3477272987365723, "logits/rejected": -2.351583242416382, "logps/chosen": -2.0789551734924316, "logps/rejected": -5.107908248901367, "loss": 0.7584, "rewards/accuracies": 0.0, "rewards/chosen": 0.8190904855728149, "rewards/margins": -0.3296794891357422, "rewards/rejected": 1.1487699747085571, "step": 1343 }, { "epoch": 2.43, "learning_rate": 9.162523308033333e-09, "logits/chosen": -2.321592092514038, "logits/rejected": -2.3388891220092773, "logps/chosen": -4.216648578643799, "logps/rejected": -10.667280197143555, "loss": 0.5765, "rewards/accuracies": 1.0, "rewards/chosen": 0.6669884324073792, "rewards/margins": 0.4122789800167084, "rewards/rejected": 0.2547094523906708, "step": 1344 }, { "epoch": 2.43, "learning_rate": 9.106271944449789e-09, "logits/chosen": -2.3227181434631348, "logits/rejected": -2.319148540496826, "logps/chosen": -4.8863749504089355, "logps/rejected": -2.6493732929229736, "loss": 0.4843, "rewards/accuracies": 1.0, "rewards/chosen": 0.8411253094673157, "rewards/margins": 0.4234675467014313, "rewards/rejected": 0.4176577627658844, "step": 1345 }, { "epoch": 2.43, "learning_rate": 9.050176480187282e-09, "logits/chosen": -2.3481860160827637, "logits/rejected": -2.3548991680145264, "logps/chosen": -2.0876851081848145, "logps/rejected": -2.493318557739258, "loss": 0.622, "rewards/accuracies": 0.0, "rewards/chosen": 0.42213496565818787, "rewards/margins": -0.07507267594337463, "rewards/rejected": 0.4972076416015625, "step": 1346 }, { "epoch": 2.44, "learning_rate": 8.994237129098787e-09, "logits/chosen": -2.4014275074005127, "logits/rejected": -2.389920473098755, "logps/chosen": -3.097116708755493, "logps/rejected": -3.595155715942383, "loss": 0.6014, "rewards/accuracies": 1.0, "rewards/chosen": 0.707036018371582, "rewards/margins": 0.21198788285255432, "rewards/rejected": 0.4950481355190277, "step": 1347 }, { "epoch": 2.44, "learning_rate": 8.938454104442117e-09, "logits/chosen": -2.2275755405426025, "logits/rejected": -2.2182717323303223, "logps/chosen": -2.6094276905059814, "logps/rejected": -6.9546637535095215, "loss": 0.6581, "rewards/accuracies": 0.0, "rewards/chosen": 0.5877529382705688, "rewards/margins": -0.19057780504226685, "rewards/rejected": 0.7783307433128357, "step": 1348 }, { "epoch": 2.44, "learning_rate": 8.882827618879108e-09, "logits/chosen": -2.497283458709717, "logits/rejected": -2.510815143585205, "logps/chosen": -2.0154685974121094, "logps/rejected": -18.515066146850586, "loss": 0.6482, "rewards/accuracies": 0.0, "rewards/chosen": 0.7435125708580017, "rewards/margins": -0.02852761745452881, "rewards/rejected": 0.7720401883125305, "step": 1349 }, { "epoch": 2.44, "learning_rate": 8.82735788447483e-09, "logits/chosen": -2.273089647293091, "logits/rejected": -2.273387908935547, "logps/chosen": -9.900066375732422, "logps/rejected": -5.120800971984863, "loss": 0.7996, "rewards/accuracies": 0.0, "rewards/chosen": 0.7461105585098267, "rewards/margins": -0.04170882701873779, "rewards/rejected": 0.7878193855285645, "step": 1350 }, { "epoch": 2.44, "learning_rate": 8.772045112696763e-09, "logits/chosen": -2.3214073181152344, "logits/rejected": -2.360323190689087, "logps/chosen": -1.2976329326629639, "logps/rejected": -8.188507080078125, "loss": 0.8093, "rewards/accuracies": 1.0, "rewards/chosen": 0.853054940700531, "rewards/margins": 0.12652122974395752, "rewards/rejected": 0.7265337109565735, "step": 1351 }, { "epoch": 2.44, "learning_rate": 8.716889514414022e-09, "logits/chosen": -2.3046011924743652, "logits/rejected": -2.3019325733184814, "logps/chosen": -4.707319259643555, "logps/rejected": -2.4579384326934814, "loss": 0.5138, "rewards/accuracies": 1.0, "rewards/chosen": 0.7649282813072205, "rewards/margins": 0.33873799443244934, "rewards/rejected": 0.4261902868747711, "step": 1352 }, { "epoch": 2.45, "learning_rate": 8.661891299896495e-09, "logits/chosen": -2.353740692138672, "logits/rejected": -2.378084659576416, "logps/chosen": -1.8246997594833374, "logps/rejected": -5.28300666809082, "loss": 0.6315, "rewards/accuracies": 1.0, "rewards/chosen": 0.9266170859336853, "rewards/margins": 0.08388590812683105, "rewards/rejected": 0.8427311778068542, "step": 1353 }, { "epoch": 2.45, "learning_rate": 8.607050678814121e-09, "logits/chosen": -2.398810625076294, "logits/rejected": -2.395275831222534, "logps/chosen": -2.9594109058380127, "logps/rejected": -3.4396023750305176, "loss": 0.6305, "rewards/accuracies": 0.0, "rewards/chosen": 0.30317172408103943, "rewards/margins": -0.20317980647087097, "rewards/rejected": 0.5063515305519104, "step": 1354 }, { "epoch": 2.45, "learning_rate": 8.552367860235976e-09, "logits/chosen": -2.336461305618286, "logits/rejected": -2.334660768508911, "logps/chosen": -4.219721794128418, "logps/rejected": -2.804283618927002, "loss": 0.5415, "rewards/accuracies": 1.0, "rewards/chosen": 0.7002097368240356, "rewards/margins": 0.325391560792923, "rewards/rejected": 0.37481817603111267, "step": 1355 }, { "epoch": 2.45, "learning_rate": 8.497843052629622e-09, "logits/chosen": -2.2063703536987305, "logits/rejected": -2.2116661071777344, "logps/chosen": -3.0791094303131104, "logps/rejected": -3.051013231277466, "loss": 0.8161, "rewards/accuracies": 0.0, "rewards/chosen": 0.8269497752189636, "rewards/margins": -0.08700823783874512, "rewards/rejected": 0.9139580130577087, "step": 1356 }, { "epoch": 2.45, "learning_rate": 8.443476463860189e-09, "logits/chosen": -2.217726230621338, "logits/rejected": -2.20607328414917, "logps/chosen": -8.666420936584473, "logps/rejected": -1.6478073596954346, "loss": 0.7262, "rewards/accuracies": 1.0, "rewards/chosen": 0.9158320426940918, "rewards/margins": 0.3114768862724304, "rewards/rejected": 0.6043551564216614, "step": 1357 }, { "epoch": 2.46, "learning_rate": 8.389268301189627e-09, "logits/chosen": -2.3246939182281494, "logits/rejected": -2.3734374046325684, "logps/chosen": -2.0469446182250977, "logps/rejected": -19.73438835144043, "loss": 0.5377, "rewards/accuracies": 1.0, "rewards/chosen": 0.6696684956550598, "rewards/margins": 0.34295663237571716, "rewards/rejected": 0.32671186327934265, "step": 1358 }, { "epoch": 2.46, "learning_rate": 8.33521877127592e-09, "logits/chosen": -2.3668346405029297, "logits/rejected": -2.3742682933807373, "logps/chosen": -3.3458640575408936, "logps/rejected": -5.4447197914123535, "loss": 0.7247, "rewards/accuracies": 1.0, "rewards/chosen": 0.7458785176277161, "rewards/margins": 0.2667100429534912, "rewards/rejected": 0.47916847467422485, "step": 1359 }, { "epoch": 2.46, "learning_rate": 8.281328080172322e-09, "logits/chosen": -2.403768301010132, "logits/rejected": -2.405622959136963, "logps/chosen": -2.03537917137146, "logps/rejected": -2.108203649520874, "loss": 0.5826, "rewards/accuracies": 1.0, "rewards/chosen": 0.7831409573554993, "rewards/margins": 0.058040738105773926, "rewards/rejected": 0.7251002192497253, "step": 1360 }, { "epoch": 2.46, "learning_rate": 8.227596433326539e-09, "logits/chosen": -2.3618226051330566, "logits/rejected": -2.3671586513519287, "logps/chosen": -1.4061698913574219, "logps/rejected": -2.111078977584839, "loss": 0.5831, "rewards/accuracies": 1.0, "rewards/chosen": 0.7063536643981934, "rewards/margins": 0.19932252168655396, "rewards/rejected": 0.5070311427116394, "step": 1361 }, { "epoch": 2.46, "learning_rate": 8.174024035579924e-09, "logits/chosen": -2.3024067878723145, "logits/rejected": -2.299494981765747, "logps/chosen": -2.026487350463867, "logps/rejected": -6.336925983428955, "loss": 0.5754, "rewards/accuracies": 1.0, "rewards/chosen": 0.6457405090332031, "rewards/margins": 0.26158109307289124, "rewards/rejected": 0.3841594159603119, "step": 1362 }, { "epoch": 2.46, "learning_rate": 8.120611091166746e-09, "logits/chosen": -2.3345632553100586, "logits/rejected": -2.395716667175293, "logps/chosen": -2.8523364067077637, "logps/rejected": -15.000792503356934, "loss": 0.8206, "rewards/accuracies": 0.0, "rewards/chosen": 0.44847702980041504, "rewards/margins": -0.4310072660446167, "rewards/rejected": 0.8794842958450317, "step": 1363 }, { "epoch": 2.47, "learning_rate": 8.067357803713365e-09, "logits/chosen": -2.3096747398376465, "logits/rejected": -2.2414112091064453, "logps/chosen": -23.22421646118164, "logps/rejected": -6.450135707855225, "loss": 0.9995, "rewards/accuracies": 0.0, "rewards/chosen": 0.15171051025390625, "rewards/margins": -0.4955233335494995, "rewards/rejected": 0.6472338438034058, "step": 1364 }, { "epoch": 2.47, "learning_rate": 8.014264376237528e-09, "logits/chosen": -2.3242576122283936, "logits/rejected": -2.3371684551239014, "logps/chosen": -1.5894083976745605, "logps/rejected": -7.724806785583496, "loss": 0.5792, "rewards/accuracies": 1.0, "rewards/chosen": 0.7434093952178955, "rewards/margins": 0.20364004373550415, "rewards/rejected": 0.5397693514823914, "step": 1365 }, { "epoch": 2.47, "learning_rate": 7.961331011147493e-09, "logits/chosen": -2.3840787410736084, "logits/rejected": -2.358928680419922, "logps/chosen": -16.854339599609375, "logps/rejected": -2.684417247772217, "loss": 0.7349, "rewards/accuracies": 1.0, "rewards/chosen": 0.5817924737930298, "rewards/margins": 0.054760515689849854, "rewards/rejected": 0.5270319581031799, "step": 1366 }, { "epoch": 2.47, "learning_rate": 7.908557910241359e-09, "logits/chosen": -2.3291361331939697, "logits/rejected": -2.328510046005249, "logps/chosen": -0.9630688428878784, "logps/rejected": -2.4728970527648926, "loss": 0.7123, "rewards/accuracies": 1.0, "rewards/chosen": 0.4851514995098114, "rewards/margins": 0.04358947277069092, "rewards/rejected": 0.4415620267391205, "step": 1367 }, { "epoch": 2.47, "learning_rate": 7.855945274706205e-09, "logits/chosen": -2.2726893424987793, "logits/rejected": -2.267235040664673, "logps/chosen": -2.1474223136901855, "logps/rejected": -1.9549089670181274, "loss": 0.6403, "rewards/accuracies": 0.0, "rewards/chosen": 0.5350881218910217, "rewards/margins": -0.08400821685791016, "rewards/rejected": 0.6190963387489319, "step": 1368 }, { "epoch": 2.48, "learning_rate": 7.80349330511742e-09, "logits/chosen": -2.365004539489746, "logits/rejected": -2.370847702026367, "logps/chosen": -2.6146833896636963, "logps/rejected": -4.847249984741211, "loss": 0.5482, "rewards/accuracies": 1.0, "rewards/chosen": 0.8160595297813416, "rewards/margins": 0.12123990058898926, "rewards/rejected": 0.6948196291923523, "step": 1369 }, { "epoch": 2.48, "learning_rate": 7.75120220143783e-09, "logits/chosen": -2.2889370918273926, "logits/rejected": -2.4352762699127197, "logps/chosen": -1.3860167264938354, "logps/rejected": -28.214733123779297, "loss": 0.4344, "rewards/accuracies": 1.0, "rewards/chosen": 0.8504924178123474, "rewards/margins": 0.5985652208328247, "rewards/rejected": 0.2519271969795227, "step": 1370 }, { "epoch": 2.48, "learning_rate": 7.699072163017011e-09, "logits/chosen": -2.201268196105957, "logits/rejected": -2.223623037338257, "logps/chosen": -0.9586105942726135, "logps/rejected": -14.145647048950195, "loss": 0.8514, "rewards/accuracies": 0.0, "rewards/chosen": 0.3939514756202698, "rewards/margins": -0.4344702959060669, "rewards/rejected": 0.8284217715263367, "step": 1371 }, { "epoch": 2.48, "learning_rate": 7.647103388590543e-09, "logits/chosen": -2.344251871109009, "logits/rejected": -2.345160722732544, "logps/chosen": -3.072357177734375, "logps/rejected": -4.200236797332764, "loss": 0.552, "rewards/accuracies": 1.0, "rewards/chosen": 0.6590787768363953, "rewards/margins": 0.19483539462089539, "rewards/rejected": 0.4642433822154999, "step": 1372 }, { "epoch": 2.48, "learning_rate": 7.595296076279156e-09, "logits/chosen": -2.335524559020996, "logits/rejected": -2.447587251663208, "logps/chosen": -2.2244551181793213, "logps/rejected": -23.33386993408203, "loss": 0.5149, "rewards/accuracies": 1.0, "rewards/chosen": 0.6343590021133423, "rewards/margins": 0.52346271276474, "rewards/rejected": 0.11089630424976349, "step": 1373 }, { "epoch": 2.48, "learning_rate": 7.543650423588105e-09, "logits/chosen": -2.2881641387939453, "logits/rejected": -2.28798246383667, "logps/chosen": -3.569453001022339, "logps/rejected": -3.5138583183288574, "loss": 0.5407, "rewards/accuracies": 1.0, "rewards/chosen": 0.8043689131736755, "rewards/margins": 0.293984591960907, "rewards/rejected": 0.5103843212127686, "step": 1374 }, { "epoch": 2.49, "learning_rate": 7.492166627406299e-09, "logits/chosen": -2.3316328525543213, "logits/rejected": -2.448969602584839, "logps/chosen": -4.071536064147949, "logps/rejected": -30.670509338378906, "loss": 0.5424, "rewards/accuracies": 1.0, "rewards/chosen": 0.796592652797699, "rewards/margins": 0.382316917181015, "rewards/rejected": 0.41427573561668396, "step": 1375 }, { "epoch": 2.49, "learning_rate": 7.440844884005615e-09, "logits/chosen": -2.461918354034424, "logits/rejected": -2.4213335514068604, "logps/chosen": -5.468845367431641, "logps/rejected": -2.2280240058898926, "loss": 0.5413, "rewards/accuracies": 1.0, "rewards/chosen": 0.9522693753242493, "rewards/margins": 0.3316139578819275, "rewards/rejected": 0.6206554174423218, "step": 1376 }, { "epoch": 2.49, "learning_rate": 7.389685389040129e-09, "logits/chosen": -2.3205068111419678, "logits/rejected": -2.328852653503418, "logps/chosen": -4.87803840637207, "logps/rejected": -26.399127960205078, "loss": 0.6016, "rewards/accuracies": 1.0, "rewards/chosen": 0.6889112591743469, "rewards/margins": 0.2272481918334961, "rewards/rejected": 0.46166306734085083, "step": 1377 }, { "epoch": 2.49, "learning_rate": 7.3386883375454016e-09, "logits/chosen": -2.290753126144409, "logits/rejected": -2.287414073944092, "logps/chosen": -1.2520242929458618, "logps/rejected": -6.238842010498047, "loss": 0.714, "rewards/accuracies": 0.0, "rewards/chosen": 0.5642870664596558, "rewards/margins": -0.22329628467559814, "rewards/rejected": 0.7875833511352539, "step": 1378 }, { "epoch": 2.49, "learning_rate": 7.287853923937698e-09, "logits/chosen": -2.376166582107544, "logits/rejected": -2.3853349685668945, "logps/chosen": -2.0470736026763916, "logps/rejected": -1.711052417755127, "loss": 0.5483, "rewards/accuracies": 1.0, "rewards/chosen": 0.636406421661377, "rewards/margins": 0.19918283820152283, "rewards/rejected": 0.4372235834598541, "step": 1379 }, { "epoch": 2.5, "learning_rate": 7.2371823420132485e-09, "logits/chosen": -2.3435893058776855, "logits/rejected": -2.3159339427948, "logps/chosen": -1.445786476135254, "logps/rejected": 0.0, "loss": 0.5771, "rewards/accuracies": 1.0, "rewards/chosen": 0.821898877620697, "rewards/margins": 0.821898877620697, "rewards/rejected": 0.0, "step": 1380 }, { "epoch": 2.5, "learning_rate": 7.186673784947511e-09, "logits/chosen": -2.2801830768585205, "logits/rejected": -2.2844104766845703, "logps/chosen": -0.7425472736358643, "logps/rejected": -8.98974895477295, "loss": 0.9075, "rewards/accuracies": 0.0, "rewards/chosen": 0.6074879765510559, "rewards/margins": -0.354958176612854, "rewards/rejected": 0.9624461531639099, "step": 1381 }, { "epoch": 2.5, "learning_rate": 7.136328445294482e-09, "logits/chosen": -2.402958631515503, "logits/rejected": -2.3969357013702393, "logps/chosen": -6.560904502868652, "logps/rejected": -2.820111036300659, "loss": 0.6176, "rewards/accuracies": 1.0, "rewards/chosen": 1.03275728225708, "rewards/margins": 0.2660290598869324, "rewards/rejected": 0.7667282223701477, "step": 1382 }, { "epoch": 2.5, "learning_rate": 7.08614651498588e-09, "logits/chosen": -2.2796599864959717, "logits/rejected": -2.3745992183685303, "logps/chosen": -1.8953107595443726, "logps/rejected": -28.078533172607422, "loss": 0.6461, "rewards/accuracies": 0.0, "rewards/chosen": 0.5924578309059143, "rewards/margins": -0.15839171409606934, "rewards/rejected": 0.7508495450019836, "step": 1383 }, { "epoch": 2.5, "learning_rate": 7.036128185330476e-09, "logits/chosen": -2.368999481201172, "logits/rejected": -2.3510098457336426, "logps/chosen": -3.819995880126953, "logps/rejected": 0.0, "loss": 0.4079, "rewards/accuracies": 1.0, "rewards/chosen": 0.77112877368927, "rewards/margins": 0.77112877368927, "rewards/rejected": 0.0, "step": 1384 }, { "epoch": 2.5, "learning_rate": 6.9862736470133635e-09, "logits/chosen": -2.3728132247924805, "logits/rejected": -2.3934741020202637, "logps/chosen": 0.0, "logps/rejected": -1.7768454551696777, "loss": 0.8754, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": -0.577136218547821, "rewards/rejected": 0.577136218547821, "step": 1385 }, { "epoch": 2.51, "learning_rate": 6.936583090095171e-09, "logits/chosen": -2.3677470684051514, "logits/rejected": -2.3589024543762207, "logps/chosen": -13.730022430419922, "logps/rejected": -2.716102123260498, "loss": 0.6252, "rewards/accuracies": 1.0, "rewards/chosen": 0.7866466641426086, "rewards/margins": 0.07043230533599854, "rewards/rejected": 0.7162143588066101, "step": 1386 }, { "epoch": 2.51, "learning_rate": 6.8870567040114295e-09, "logits/chosen": -2.232022523880005, "logits/rejected": -2.2338814735412598, "logps/chosen": -5.092281341552734, "logps/rejected": -7.248599052429199, "loss": 0.6838, "rewards/accuracies": 1.0, "rewards/chosen": 0.5426470041275024, "rewards/margins": 0.03781759738922119, "rewards/rejected": 0.5048294067382812, "step": 1387 }, { "epoch": 2.51, "learning_rate": 6.8376946775717645e-09, "logits/chosen": -2.328669548034668, "logits/rejected": -2.336531400680542, "logps/chosen": -7.143026351928711, "logps/rejected": -3.7822210788726807, "loss": 0.656, "rewards/accuracies": 1.0, "rewards/chosen": 0.8968673944473267, "rewards/margins": 0.15453141927719116, "rewards/rejected": 0.7423359751701355, "step": 1388 }, { "epoch": 2.51, "learning_rate": 6.7884971989592254e-09, "logits/chosen": -2.3830463886260986, "logits/rejected": -2.410616874694824, "logps/chosen": -1.8813602924346924, "logps/rejected": -23.317012786865234, "loss": 0.7112, "rewards/accuracies": 0.0, "rewards/chosen": 0.6031274795532227, "rewards/margins": -0.15024244785308838, "rewards/rejected": 0.753369927406311, "step": 1389 }, { "epoch": 2.51, "learning_rate": 6.7394644557295434e-09, "logits/chosen": -2.292097330093384, "logits/rejected": -2.2956604957580566, "logps/chosen": -1.690659761428833, "logps/rejected": -1.2203375101089478, "loss": 0.5723, "rewards/accuracies": 0.0, "rewards/chosen": 0.5676097273826599, "rewards/margins": -0.023933827877044678, "rewards/rejected": 0.5915435552597046, "step": 1390 }, { "epoch": 2.52, "learning_rate": 6.690596634810447e-09, "logits/chosen": -2.2231359481811523, "logits/rejected": -2.227362871170044, "logps/chosen": -5.156829357147217, "logps/rejected": -3.2898991107940674, "loss": 0.547, "rewards/accuracies": 1.0, "rewards/chosen": 1.2501415014266968, "rewards/margins": 0.5091633200645447, "rewards/rejected": 0.7409781813621521, "step": 1391 }, { "epoch": 2.52, "learning_rate": 6.641893922500941e-09, "logits/chosen": -2.3363659381866455, "logits/rejected": -2.3093695640563965, "logps/chosen": -9.611724853515625, "logps/rejected": -4.5935444831848145, "loss": 0.6203, "rewards/accuracies": 1.0, "rewards/chosen": 0.9159442782402039, "rewards/margins": 0.09262502193450928, "rewards/rejected": 0.8233192563056946, "step": 1392 }, { "epoch": 2.52, "learning_rate": 6.593356504470548e-09, "logits/chosen": -2.4002015590667725, "logits/rejected": -2.3607590198516846, "logps/chosen": -18.834609985351562, "logps/rejected": -2.768991470336914, "loss": 0.6137, "rewards/accuracies": 1.0, "rewards/chosen": 0.7658737301826477, "rewards/margins": 0.18239295482635498, "rewards/rejected": 0.5834807753562927, "step": 1393 }, { "epoch": 2.52, "learning_rate": 6.544984565758654e-09, "logits/chosen": -2.3760197162628174, "logits/rejected": -2.379185676574707, "logps/chosen": -1.3787564039230347, "logps/rejected": -9.75454044342041, "loss": 0.7404, "rewards/accuracies": 0.0, "rewards/chosen": 0.6567904949188232, "rewards/margins": -0.15247035026550293, "rewards/rejected": 0.8092608451843262, "step": 1394 }, { "epoch": 2.52, "learning_rate": 6.496778290773813e-09, "logits/chosen": -2.1974947452545166, "logits/rejected": -2.2075345516204834, "logps/chosen": -1.6874476671218872, "logps/rejected": -11.899200439453125, "loss": 0.7183, "rewards/accuracies": 1.0, "rewards/chosen": 0.6082696318626404, "rewards/margins": 0.41892731189727783, "rewards/rejected": 0.18934230506420135, "step": 1395 }, { "epoch": 2.52, "learning_rate": 6.448737863292975e-09, "logits/chosen": -2.3506922721862793, "logits/rejected": -2.3492660522460938, "logps/chosen": -6.8643035888671875, "logps/rejected": -5.624839782714844, "loss": 0.5559, "rewards/accuracies": 1.0, "rewards/chosen": 1.0148853063583374, "rewards/margins": 0.5513678193092346, "rewards/rejected": 0.4635174870491028, "step": 1396 }, { "epoch": 2.53, "learning_rate": 6.400863466460871e-09, "logits/chosen": -2.354678153991699, "logits/rejected": -2.3538193702697754, "logps/chosen": -0.7443430423736572, "logps/rejected": -2.7021656036376953, "loss": 0.7162, "rewards/accuracies": 1.0, "rewards/chosen": 0.6619553565979004, "rewards/margins": 0.12108439207077026, "rewards/rejected": 0.5408709645271301, "step": 1397 }, { "epoch": 2.53, "learning_rate": 6.3531552827892465e-09, "logits/chosen": -2.3659534454345703, "logits/rejected": -2.3637146949768066, "logps/chosen": -2.3335673809051514, "logps/rejected": -3.6703219413757324, "loss": 0.5445, "rewards/accuracies": 1.0, "rewards/chosen": 0.6775109171867371, "rewards/margins": 0.3125939667224884, "rewards/rejected": 0.36491695046424866, "step": 1398 }, { "epoch": 2.53, "learning_rate": 6.305613494156181e-09, "logits/chosen": -2.315645694732666, "logits/rejected": -2.317359209060669, "logps/chosen": -2.3351614475250244, "logps/rejected": -2.5685527324676514, "loss": 0.6809, "rewards/accuracies": 0.0, "rewards/chosen": 0.6738348007202148, "rewards/margins": -0.04399919509887695, "rewards/rejected": 0.7178339958190918, "step": 1399 }, { "epoch": 2.53, "learning_rate": 6.2582382818054556e-09, "logits/chosen": -2.24926495552063, "logits/rejected": -2.3402819633483887, "logps/chosen": -2.5565295219421387, "logps/rejected": -25.926773071289062, "loss": 0.6597, "rewards/accuracies": 1.0, "rewards/chosen": 0.4458954334259033, "rewards/margins": 0.1395527422428131, "rewards/rejected": 0.3063426911830902, "step": 1400 }, { "epoch": 2.53, "learning_rate": 6.211029826345759e-09, "logits/chosen": -2.3375415802001953, "logits/rejected": -2.349270820617676, "logps/chosen": -1.8560173511505127, "logps/rejected": -6.346161842346191, "loss": 0.582, "rewards/accuracies": 1.0, "rewards/chosen": 0.7117663621902466, "rewards/margins": 0.2867532968521118, "rewards/rejected": 0.42501306533813477, "step": 1401 }, { "epoch": 2.54, "learning_rate": 6.163988307750068e-09, "logits/chosen": -2.324848175048828, "logits/rejected": -2.31081223487854, "logps/chosen": -4.875542163848877, "logps/rejected": -10.274526596069336, "loss": 0.9238, "rewards/accuracies": 0.0, "rewards/chosen": 0.6179593801498413, "rewards/margins": -0.3915823698043823, "rewards/rejected": 1.0095417499542236, "step": 1402 }, { "epoch": 2.54, "learning_rate": 6.117113905354959e-09, "logits/chosen": -2.4034833908081055, "logits/rejected": -2.3774495124816895, "logps/chosen": -15.314504623413086, "logps/rejected": 0.0, "loss": 0.5264, "rewards/accuracies": 1.0, "rewards/chosen": 0.8660215735435486, "rewards/margins": 0.8660215735435486, "rewards/rejected": 0.0, "step": 1403 }, { "epoch": 2.54, "learning_rate": 6.0704067978599114e-09, "logits/chosen": -2.2468979358673096, "logits/rejected": -2.2364108562469482, "logps/chosen": -1.9296854734420776, "logps/rejected": -4.678910732269287, "loss": 0.6029, "rewards/accuracies": 1.0, "rewards/chosen": 0.8462132811546326, "rewards/margins": 0.23465842008590698, "rewards/rejected": 0.6115548610687256, "step": 1404 }, { "epoch": 2.54, "learning_rate": 6.0238671633266015e-09, "logits/chosen": -2.381450653076172, "logits/rejected": -2.37331485748291, "logps/chosen": -2.968247413635254, "logps/rejected": -5.344969749450684, "loss": 0.5973, "rewards/accuracies": 1.0, "rewards/chosen": 0.9269253611564636, "rewards/margins": 0.14435595273971558, "rewards/rejected": 0.782569408416748, "step": 1405 }, { "epoch": 2.54, "learning_rate": 5.977495179178266e-09, "logits/chosen": -2.1609039306640625, "logits/rejected": -2.1615586280822754, "logps/chosen": -1.500473976135254, "logps/rejected": -3.0314383506774902, "loss": 0.5531, "rewards/accuracies": 1.0, "rewards/chosen": 0.7837349772453308, "rewards/margins": 0.3871210515499115, "rewards/rejected": 0.3966139256954193, "step": 1406 }, { "epoch": 2.54, "learning_rate": 5.931291022198992e-09, "logits/chosen": -2.276628017425537, "logits/rejected": -2.248070001602173, "logps/chosen": -3.4340689182281494, "logps/rejected": -3.2922728061676025, "loss": 0.738, "rewards/accuracies": 1.0, "rewards/chosen": 0.8665112853050232, "rewards/margins": 0.2044774293899536, "rewards/rejected": 0.6620338559150696, "step": 1407 }, { "epoch": 2.55, "learning_rate": 5.885254868533096e-09, "logits/chosen": -2.325566053390503, "logits/rejected": -2.333745002746582, "logps/chosen": -3.289716958999634, "logps/rejected": -3.0786712169647217, "loss": 0.6801, "rewards/accuracies": 1.0, "rewards/chosen": 0.7794634103775024, "rewards/margins": 0.33264878392219543, "rewards/rejected": 0.446814626455307, "step": 1408 }, { "epoch": 2.55, "learning_rate": 5.839386893684356e-09, "logits/chosen": -2.34338116645813, "logits/rejected": -2.347034454345703, "logps/chosen": -1.234941005706787, "logps/rejected": -3.5731499195098877, "loss": 0.6181, "rewards/accuracies": 1.0, "rewards/chosen": 0.8229999542236328, "rewards/margins": 0.17016440629959106, "rewards/rejected": 0.6528355479240417, "step": 1409 }, { "epoch": 2.55, "learning_rate": 5.7936872725154626e-09, "logits/chosen": -2.3328847885131836, "logits/rejected": -2.346701145172119, "logps/chosen": -15.193310737609863, "logps/rejected": -22.676841735839844, "loss": 0.5461, "rewards/accuracies": 1.0, "rewards/chosen": 1.0806390047073364, "rewards/margins": 0.5249153971672058, "rewards/rejected": 0.5557236075401306, "step": 1410 }, { "epoch": 2.55, "learning_rate": 5.748156179247238e-09, "logits/chosen": -2.272620439529419, "logits/rejected": -2.2746055126190186, "logps/chosen": -3.55539870262146, "logps/rejected": -4.663932800292969, "loss": 0.682, "rewards/accuracies": 1.0, "rewards/chosen": 0.9885585904121399, "rewards/margins": 0.5133179426193237, "rewards/rejected": 0.4752406179904938, "step": 1411 }, { "epoch": 2.55, "learning_rate": 5.702793787458071e-09, "logits/chosen": -2.3214499950408936, "logits/rejected": -2.3225932121276855, "logps/chosen": -4.017989635467529, "logps/rejected": -9.726454734802246, "loss": 0.7114, "rewards/accuracies": 0.0, "rewards/chosen": 0.7029650211334229, "rewards/margins": -0.09680026769638062, "rewards/rejected": 0.7997652888298035, "step": 1412 }, { "epoch": 2.56, "learning_rate": 5.657600270083179e-09, "logits/chosen": -2.3498146533966064, "logits/rejected": -2.345102071762085, "logps/chosen": -1.7518508434295654, "logps/rejected": -5.642270088195801, "loss": 0.5969, "rewards/accuracies": 1.0, "rewards/chosen": 0.7871186137199402, "rewards/margins": 0.3381795585155487, "rewards/rejected": 0.4489390552043915, "step": 1413 }, { "epoch": 2.56, "learning_rate": 5.612575799413988e-09, "logits/chosen": -2.357977867126465, "logits/rejected": -2.352372646331787, "logps/chosen": -8.161837577819824, "logps/rejected": -4.806396007537842, "loss": 0.5895, "rewards/accuracies": 1.0, "rewards/chosen": 1.0051137208938599, "rewards/margins": 0.44487255811691284, "rewards/rejected": 0.560241162776947, "step": 1414 }, { "epoch": 2.56, "learning_rate": 5.567720547097449e-09, "logits/chosen": -2.2806951999664307, "logits/rejected": -2.2907960414886475, "logps/chosen": -3.1834473609924316, "logps/rejected": -6.652350425720215, "loss": 0.5905, "rewards/accuracies": 1.0, "rewards/chosen": 0.6512389779090881, "rewards/margins": 0.35365062952041626, "rewards/rejected": 0.2975883483886719, "step": 1415 }, { "epoch": 2.56, "learning_rate": 5.523034684135431e-09, "logits/chosen": -2.167186975479126, "logits/rejected": -2.2219736576080322, "logps/chosen": -2.1971535682678223, "logps/rejected": -26.55652618408203, "loss": 0.5644, "rewards/accuracies": 1.0, "rewards/chosen": 0.7453343868255615, "rewards/margins": 0.39827457070350647, "rewards/rejected": 0.34705981612205505, "step": 1416 }, { "epoch": 2.56, "learning_rate": 5.478518380884034e-09, "logits/chosen": -2.29978346824646, "logits/rejected": -2.345736026763916, "logps/chosen": -1.5523293018341064, "logps/rejected": -26.846370697021484, "loss": 0.478, "rewards/accuracies": 1.0, "rewards/chosen": 0.6394910216331482, "rewards/margins": 1.104584813117981, "rewards/rejected": -0.46509382128715515, "step": 1417 }, { "epoch": 2.56, "learning_rate": 5.434171807052934e-09, "logits/chosen": -2.223708391189575, "logits/rejected": -2.2320051193237305, "logps/chosen": -1.9255943298339844, "logps/rejected": -3.4930543899536133, "loss": 0.5939, "rewards/accuracies": 0.0, "rewards/chosen": 0.5281627774238586, "rewards/margins": -0.21229946613311768, "rewards/rejected": 0.7404622435569763, "step": 1418 }, { "epoch": 2.57, "learning_rate": 5.389995131704738e-09, "logits/chosen": -2.2191383838653564, "logits/rejected": -2.216630220413208, "logps/chosen": -0.8303394913673401, "logps/rejected": -4.067147731781006, "loss": 0.5905, "rewards/accuracies": 1.0, "rewards/chosen": 0.4600386619567871, "rewards/margins": 0.06406530737876892, "rewards/rejected": 0.3959733545780182, "step": 1419 }, { "epoch": 2.57, "learning_rate": 5.345988523254358e-09, "logits/chosen": -2.334089517593384, "logits/rejected": -2.3262314796447754, "logps/chosen": -2.2022247314453125, "logps/rejected": -5.100882053375244, "loss": 0.6197, "rewards/accuracies": 1.0, "rewards/chosen": 0.7555332183837891, "rewards/margins": 0.3402242958545685, "rewards/rejected": 0.4153089225292206, "step": 1420 }, { "epoch": 2.57, "learning_rate": 5.30215214946837e-09, "logits/chosen": -2.232499599456787, "logits/rejected": -2.232166290283203, "logps/chosen": -1.533359408378601, "logps/rejected": -0.9424593448638916, "loss": 0.5944, "rewards/accuracies": 1.0, "rewards/chosen": 0.6039294600486755, "rewards/margins": 0.17551273107528687, "rewards/rejected": 0.42841672897338867, "step": 1421 }, { "epoch": 2.57, "learning_rate": 5.258486177464366e-09, "logits/chosen": -2.1631743907928467, "logits/rejected": -2.2302451133728027, "logps/chosen": -2.3587584495544434, "logps/rejected": -23.164623260498047, "loss": 0.6952, "rewards/accuracies": 1.0, "rewards/chosen": 0.5428017973899841, "rewards/margins": 0.009915649890899658, "rewards/rejected": 0.5328861474990845, "step": 1422 }, { "epoch": 2.57, "learning_rate": 5.214990773710293e-09, "logits/chosen": -2.401287794113159, "logits/rejected": -2.4037351608276367, "logps/chosen": -2.6214468479156494, "logps/rejected": -4.165665149688721, "loss": 0.6115, "rewards/accuracies": 1.0, "rewards/chosen": 0.8432270884513855, "rewards/margins": 0.12144041061401367, "rewards/rejected": 0.7217866778373718, "step": 1423 }, { "epoch": 2.58, "learning_rate": 5.171666104023836e-09, "logits/chosen": -2.2171523571014404, "logits/rejected": -2.21156907081604, "logps/chosen": -3.68410587310791, "logps/rejected": -12.2550048828125, "loss": 0.5792, "rewards/accuracies": 1.0, "rewards/chosen": 0.7842289209365845, "rewards/margins": 0.361110121011734, "rewards/rejected": 0.42311879992485046, "step": 1424 }, { "epoch": 2.58, "learning_rate": 5.1285123335718324e-09, "logits/chosen": -2.300485372543335, "logits/rejected": -2.2684504985809326, "logps/chosen": -5.3744401931762695, "logps/rejected": 0.0, "loss": 0.4785, "rewards/accuracies": 1.0, "rewards/chosen": 1.0474214553833008, "rewards/margins": 1.0474214553833008, "rewards/rejected": 0.0, "step": 1425 }, { "epoch": 2.58, "learning_rate": 5.085529626869556e-09, "logits/chosen": -2.3300230503082275, "logits/rejected": -2.322310209274292, "logps/chosen": -3.1354570388793945, "logps/rejected": -2.8712210655212402, "loss": 0.6098, "rewards/accuracies": 0.0, "rewards/chosen": 0.5549231767654419, "rewards/margins": -0.05531269311904907, "rewards/rejected": 0.610235869884491, "step": 1426 }, { "epoch": 2.58, "learning_rate": 5.04271814778014e-09, "logits/chosen": -2.388052225112915, "logits/rejected": -2.39744234085083, "logps/chosen": -1.5522611141204834, "logps/rejected": -10.249876022338867, "loss": 0.6467, "rewards/accuracies": 1.0, "rewards/chosen": 0.8547404408454895, "rewards/margins": 0.20516729354858398, "rewards/rejected": 0.6495731472969055, "step": 1427 }, { "epoch": 2.58, "learning_rate": 5.000078059513979e-09, "logits/chosen": -2.3116161823272705, "logits/rejected": -2.3115365505218506, "logps/chosen": -2.1880085468292236, "logps/rejected": -0.6787824034690857, "loss": 0.6223, "rewards/accuracies": 1.0, "rewards/chosen": 0.4657101631164551, "rewards/margins": 0.0624488890171051, "rewards/rejected": 0.40326127409935, "step": 1428 }, { "epoch": 2.58, "learning_rate": 4.957609524628026e-09, "logits/chosen": -2.2893693447113037, "logits/rejected": -2.285987138748169, "logps/chosen": -1.2612619400024414, "logps/rejected": -3.9148612022399902, "loss": 0.7391, "rewards/accuracies": 1.0, "rewards/chosen": 0.7272526621818542, "rewards/margins": 0.09411048889160156, "rewards/rejected": 0.6331421732902527, "step": 1429 }, { "epoch": 2.59, "learning_rate": 4.9153127050252654e-09, "logits/chosen": -2.4550318717956543, "logits/rejected": -2.4165244102478027, "logps/chosen": -24.01563262939453, "logps/rejected": -3.910933494567871, "loss": 0.6734, "rewards/accuracies": 0.0, "rewards/chosen": 0.23713131248950958, "rewards/margins": -0.2347385436296463, "rewards/rejected": 0.4718698561191559, "step": 1430 }, { "epoch": 2.59, "learning_rate": 4.8731877619540175e-09, "logits/chosen": -2.309148073196411, "logits/rejected": -2.314624309539795, "logps/chosen": -0.9868643283843994, "logps/rejected": -1.4320579767227173, "loss": 0.8422, "rewards/accuracies": 1.0, "rewards/chosen": 0.4984380900859833, "rewards/margins": 0.03945803642272949, "rewards/rejected": 0.4589800536632538, "step": 1431 }, { "epoch": 2.59, "learning_rate": 4.831234856007371e-09, "logits/chosen": -2.3915438652038574, "logits/rejected": -2.3871278762817383, "logps/chosen": -6.818820953369141, "logps/rejected": -9.386347770690918, "loss": 0.7328, "rewards/accuracies": 1.0, "rewards/chosen": 0.9453754425048828, "rewards/margins": 0.39174163341522217, "rewards/rejected": 0.5536338090896606, "step": 1432 }, { "epoch": 2.59, "learning_rate": 4.789454147122551e-09, "logits/chosen": -2.287536859512329, "logits/rejected": -2.3443362712860107, "logps/chosen": -7.071581840515137, "logps/rejected": -36.522674560546875, "loss": 0.5534, "rewards/accuracies": 1.0, "rewards/chosen": 1.0785702466964722, "rewards/margins": 0.3170529007911682, "rewards/rejected": 0.761517345905304, "step": 1433 }, { "epoch": 2.59, "learning_rate": 4.7478457945803244e-09, "logits/chosen": -2.3080520629882812, "logits/rejected": -2.3090217113494873, "logps/chosen": -1.1462411880493164, "logps/rejected": -1.6001229286193848, "loss": 0.6916, "rewards/accuracies": 1.0, "rewards/chosen": 0.7625682950019836, "rewards/margins": 0.252008318901062, "rewards/rejected": 0.5105599761009216, "step": 1434 }, { "epoch": 2.59, "learning_rate": 4.706409957004381e-09, "logits/chosen": -2.3110525608062744, "logits/rejected": -2.3332386016845703, "logps/chosen": -1.490909218788147, "logps/rejected": -16.70949935913086, "loss": 0.7612, "rewards/accuracies": 0.0, "rewards/chosen": 0.6408243775367737, "rewards/margins": -0.15186095237731934, "rewards/rejected": 0.792685329914093, "step": 1435 }, { "epoch": 2.6, "learning_rate": 4.665146792360724e-09, "logits/chosen": -2.305572986602783, "logits/rejected": -2.311762809753418, "logps/chosen": -1.1423403024673462, "logps/rejected": -2.3745810985565186, "loss": 0.6447, "rewards/accuracies": 1.0, "rewards/chosen": 0.7448868155479431, "rewards/margins": 0.23216718435287476, "rewards/rejected": 0.5127196311950684, "step": 1436 }, { "epoch": 2.6, "learning_rate": 4.624056457957076e-09, "logits/chosen": -2.3082611560821533, "logits/rejected": -2.3995890617370605, "logps/chosen": -1.698121190071106, "logps/rejected": -28.246461868286133, "loss": 0.7515, "rewards/accuracies": 1.0, "rewards/chosen": 0.7054395079612732, "rewards/margins": 0.19791555404663086, "rewards/rejected": 0.5075239539146423, "step": 1437 }, { "epoch": 2.6, "learning_rate": 4.583139110442291e-09, "logits/chosen": -2.316300630569458, "logits/rejected": -2.2859556674957275, "logps/chosen": -1.7737778425216675, "logps/rejected": 0.0, "loss": 0.4735, "rewards/accuracies": 1.0, "rewards/chosen": 0.711563229560852, "rewards/margins": 0.711563229560852, "rewards/rejected": 0.0, "step": 1438 }, { "epoch": 2.6, "learning_rate": 4.54239490580573e-09, "logits/chosen": -2.2761192321777344, "logits/rejected": -2.264592170715332, "logps/chosen": -3.8547346591949463, "logps/rejected": -9.438475608825684, "loss": 0.6148, "rewards/accuracies": 1.0, "rewards/chosen": 0.7648565173149109, "rewards/margins": 0.06930243968963623, "rewards/rejected": 0.6955540776252747, "step": 1439 }, { "epoch": 2.6, "learning_rate": 4.5018239993766695e-09, "logits/chosen": -2.358623504638672, "logits/rejected": -2.364335298538208, "logps/chosen": -3.602846622467041, "logps/rejected": -12.104948997497559, "loss": 0.4409, "rewards/accuracies": 1.0, "rewards/chosen": 0.9628677368164062, "rewards/margins": 0.5900852084159851, "rewards/rejected": 0.37278252840042114, "step": 1440 }, { "epoch": 2.61, "learning_rate": 4.461426545823766e-09, "logits/chosen": -2.396239995956421, "logits/rejected": -2.4738945960998535, "logps/chosen": -1.695919394493103, "logps/rejected": -8.712794303894043, "loss": 0.6054, "rewards/accuracies": 1.0, "rewards/chosen": 0.7507003545761108, "rewards/margins": 0.16720646619796753, "rewards/rejected": 0.5834938883781433, "step": 1441 }, { "epoch": 2.61, "learning_rate": 4.421202699154364e-09, "logits/chosen": -2.1274776458740234, "logits/rejected": -2.1346943378448486, "logps/chosen": -2.4848523139953613, "logps/rejected": -3.712862253189087, "loss": 0.6832, "rewards/accuracies": 1.0, "rewards/chosen": 0.43554797768592834, "rewards/margins": 0.0563834011554718, "rewards/rejected": 0.37916457653045654, "step": 1442 }, { "epoch": 2.61, "learning_rate": 4.381152612714012e-09, "logits/chosen": -2.3364932537078857, "logits/rejected": -2.3244495391845703, "logps/chosen": -5.902667045593262, "logps/rejected": -2.7968785762786865, "loss": 0.6485, "rewards/accuracies": 1.0, "rewards/chosen": 0.8739457130432129, "rewards/margins": 0.06093984842300415, "rewards/rejected": 0.8130058646202087, "step": 1443 }, { "epoch": 2.61, "learning_rate": 4.341276439185809e-09, "logits/chosen": -2.434633255004883, "logits/rejected": -2.4597675800323486, "logps/chosen": -1.5169192552566528, "logps/rejected": -11.628116607666016, "loss": 0.75, "rewards/accuracies": 0.0, "rewards/chosen": 0.6750157475471497, "rewards/margins": -0.2156466245651245, "rewards/rejected": 0.8906623721122742, "step": 1444 }, { "epoch": 2.61, "learning_rate": 4.301574330589836e-09, "logits/chosen": -2.3495283126831055, "logits/rejected": -2.3578739166259766, "logps/chosen": -2.5245611667633057, "logps/rejected": -5.434886932373047, "loss": 0.4361, "rewards/accuracies": 1.0, "rewards/chosen": 0.8748682141304016, "rewards/margins": 0.24473607540130615, "rewards/rejected": 0.6301321387290955, "step": 1445 }, { "epoch": 2.61, "learning_rate": 4.2620464382826015e-09, "logits/chosen": -2.2900233268737793, "logits/rejected": -2.365750312805176, "logps/chosen": -1.9236632585525513, "logps/rejected": -23.797014236450195, "loss": 0.5574, "rewards/accuracies": 0.0, "rewards/chosen": 0.5765078663825989, "rewards/margins": -0.266076922416687, "rewards/rejected": 0.8425847887992859, "step": 1446 }, { "epoch": 2.62, "learning_rate": 4.222692912956466e-09, "logits/chosen": -2.3212156295776367, "logits/rejected": -2.3158626556396484, "logps/chosen": -2.5648598670959473, "logps/rejected": -8.596368789672852, "loss": 0.6472, "rewards/accuracies": 0.0, "rewards/chosen": 0.7419024109840393, "rewards/margins": -0.5723170638084412, "rewards/rejected": 1.3142194747924805, "step": 1447 }, { "epoch": 2.62, "learning_rate": 4.1835139046390185e-09, "logits/chosen": -2.43082594871521, "logits/rejected": -2.480292320251465, "logps/chosen": -6.223731994628906, "logps/rejected": -17.812572479248047, "loss": 0.6845, "rewards/accuracies": 0.0, "rewards/chosen": 0.4107431471347809, "rewards/margins": -0.5621544122695923, "rewards/rejected": 0.9728975296020508, "step": 1448 }, { "epoch": 2.62, "learning_rate": 4.144509562692544e-09, "logits/chosen": -2.3398489952087402, "logits/rejected": -2.330829620361328, "logps/chosen": -3.5644772052764893, "logps/rejected": -6.917041778564453, "loss": 0.7709, "rewards/accuracies": 0.0, "rewards/chosen": 0.7617980241775513, "rewards/margins": -0.23846042156219482, "rewards/rejected": 1.000258445739746, "step": 1449 }, { "epoch": 2.62, "learning_rate": 4.105680035813436e-09, "logits/chosen": -2.212148427963257, "logits/rejected": -2.2082040309906006, "logps/chosen": -3.33453369140625, "logps/rejected": -1.200752854347229, "loss": 0.7794, "rewards/accuracies": 0.0, "rewards/chosen": 0.6600889563560486, "rewards/margins": -0.022562623023986816, "rewards/rejected": 0.6826515793800354, "step": 1450 }, { "epoch": 2.62, "learning_rate": 4.067025472031677e-09, "logits/chosen": -2.2112715244293213, "logits/rejected": -2.2240562438964844, "logps/chosen": -3.2498950958251953, "logps/rejected": -15.479267120361328, "loss": 0.601, "rewards/accuracies": 1.0, "rewards/chosen": 0.6870523691177368, "rewards/margins": 0.367605596780777, "rewards/rejected": 0.31944677233695984, "step": 1451 }, { "epoch": 2.63, "learning_rate": 4.028546018710199e-09, "logits/chosen": -2.345184087753296, "logits/rejected": -2.342506170272827, "logps/chosen": -1.6956162452697754, "logps/rejected": -3.6930956840515137, "loss": 0.7651, "rewards/accuracies": 0.0, "rewards/chosen": 0.664962649345398, "rewards/margins": -0.2696717381477356, "rewards/rejected": 0.9346343874931335, "step": 1452 }, { "epoch": 2.63, "learning_rate": 3.990241822544382e-09, "logits/chosen": -2.376222848892212, "logits/rejected": -2.376382827758789, "logps/chosen": -11.328076362609863, "logps/rejected": -1.5193564891815186, "loss": 0.6798, "rewards/accuracies": 0.0, "rewards/chosen": 0.3742598593235016, "rewards/margins": -0.08501949906349182, "rewards/rejected": 0.4592793583869934, "step": 1453 }, { "epoch": 2.63, "learning_rate": 3.95211302956146e-09, "logits/chosen": -2.415208101272583, "logits/rejected": -2.4479854106903076, "logps/chosen": -1.1440259218215942, "logps/rejected": -21.911327362060547, "loss": 0.4541, "rewards/accuracies": 1.0, "rewards/chosen": 0.8486658930778503, "rewards/margins": 0.773401141166687, "rewards/rejected": 0.07526474446058273, "step": 1454 }, { "epoch": 2.63, "learning_rate": 3.91415978511998e-09, "logits/chosen": -2.4049837589263916, "logits/rejected": -2.401914358139038, "logps/chosen": -2.111666440963745, "logps/rejected": -6.447475910186768, "loss": 0.5423, "rewards/accuracies": 1.0, "rewards/chosen": 0.8768278956413269, "rewards/margins": 0.3765532970428467, "rewards/rejected": 0.5002745985984802, "step": 1455 }, { "epoch": 2.63, "learning_rate": 3.876382233909248e-09, "logits/chosen": -2.278550624847412, "logits/rejected": -2.279322624206543, "logps/chosen": -2.8827762603759766, "logps/rejected": -1.9801064729690552, "loss": 0.672, "rewards/accuracies": 1.0, "rewards/chosen": 0.7084670066833496, "rewards/margins": 0.08985620737075806, "rewards/rejected": 0.6186107993125916, "step": 1456 }, { "epoch": 2.63, "learning_rate": 3.838780519948776e-09, "logits/chosen": -2.2302956581115723, "logits/rejected": -2.2071073055267334, "logps/chosen": -1.6649699211120605, "logps/rejected": 0.0, "loss": 0.4335, "rewards/accuracies": 1.0, "rewards/chosen": 0.6877478361129761, "rewards/margins": 0.6877478361129761, "rewards/rejected": 0.0, "step": 1457 }, { "epoch": 2.64, "learning_rate": 3.801354786587712e-09, "logits/chosen": -2.2315008640289307, "logits/rejected": -2.234318256378174, "logps/chosen": -2.145198345184326, "logps/rejected": -9.995620727539062, "loss": 0.9063, "rewards/accuracies": 1.0, "rewards/chosen": 0.6745062470436096, "rewards/margins": 0.01595062017440796, "rewards/rejected": 0.6585556268692017, "step": 1458 }, { "epoch": 2.64, "learning_rate": 3.7641051765043395e-09, "logits/chosen": -2.4147157669067383, "logits/rejected": -2.415548324584961, "logps/chosen": -1.4536722898483276, "logps/rejected": -2.5106184482574463, "loss": 0.6985, "rewards/accuracies": 1.0, "rewards/chosen": 0.7432599067687988, "rewards/margins": 0.14239531755447388, "rewards/rejected": 0.600864589214325, "step": 1459 }, { "epoch": 2.64, "learning_rate": 3.727031831705496e-09, "logits/chosen": -2.366234064102173, "logits/rejected": -2.3647913932800293, "logps/chosen": -0.9005768895149231, "logps/rejected": -12.493736267089844, "loss": 0.6858, "rewards/accuracies": 0.0, "rewards/chosen": 0.6352993249893188, "rewards/margins": -0.09709888696670532, "rewards/rejected": 0.7323982119560242, "step": 1460 }, { "epoch": 2.64, "learning_rate": 3.690134893526037e-09, "logits/chosen": -2.3586509227752686, "logits/rejected": -2.339271306991577, "logps/chosen": -4.414494037628174, "logps/rejected": -2.8612935543060303, "loss": 0.5833, "rewards/accuracies": 1.0, "rewards/chosen": 0.836696445941925, "rewards/margins": 0.27795326709747314, "rewards/rejected": 0.5587431788444519, "step": 1461 }, { "epoch": 2.64, "learning_rate": 3.6534145026283104e-09, "logits/chosen": -2.3474862575531006, "logits/rejected": -2.380354881286621, "logps/chosen": -5.09328556060791, "logps/rejected": -8.750419616699219, "loss": 0.4932, "rewards/accuracies": 1.0, "rewards/chosen": 0.8877240419387817, "rewards/margins": 0.16066831350326538, "rewards/rejected": 0.7270557284355164, "step": 1462 }, { "epoch": 2.65, "learning_rate": 3.6168707990015868e-09, "logits/chosen": -2.159242868423462, "logits/rejected": -2.188103437423706, "logps/chosen": -2.3987178802490234, "logps/rejected": -2.7755651473999023, "loss": 0.6465, "rewards/accuracies": 1.0, "rewards/chosen": 0.8361589312553406, "rewards/margins": 0.0553470253944397, "rewards/rejected": 0.7808119058609009, "step": 1463 }, { "epoch": 2.65, "learning_rate": 3.5805039219615975e-09, "logits/chosen": -2.3933932781219482, "logits/rejected": -2.3942313194274902, "logps/chosen": -2.496242046356201, "logps/rejected": -2.3950612545013428, "loss": 0.7313, "rewards/accuracies": 0.0, "rewards/chosen": 0.4467069208621979, "rewards/margins": -0.15028271079063416, "rewards/rejected": 0.596989631652832, "step": 1464 }, { "epoch": 2.65, "learning_rate": 3.5443140101499124e-09, "logits/chosen": -2.25778865814209, "logits/rejected": -2.3961570262908936, "logps/chosen": -1.5242705345153809, "logps/rejected": -32.68446731567383, "loss": 0.6661, "rewards/accuracies": 1.0, "rewards/chosen": 0.8117426037788391, "rewards/margins": 0.07900983095169067, "rewards/rejected": 0.7327327728271484, "step": 1465 }, { "epoch": 2.65, "learning_rate": 3.5083012015334977e-09, "logits/chosen": -2.3158063888549805, "logits/rejected": -2.3197243213653564, "logps/chosen": -6.731372356414795, "logps/rejected": -9.120829582214355, "loss": 0.7339, "rewards/accuracies": 0.0, "rewards/chosen": 0.5785087943077087, "rewards/margins": -0.2738892436027527, "rewards/rejected": 0.8523980379104614, "step": 1466 }, { "epoch": 2.65, "learning_rate": 3.4724656334041037e-09, "logits/chosen": -2.4517571926116943, "logits/rejected": -2.4512531757354736, "logps/chosen": -0.9929729700088501, "logps/rejected": -1.9769386053085327, "loss": 0.5833, "rewards/accuracies": 1.0, "rewards/chosen": 0.7917308211326599, "rewards/margins": 0.3696030378341675, "rewards/rejected": 0.42212778329849243, "step": 1467 }, { "epoch": 2.65, "learning_rate": 3.4368074423778146e-09, "logits/chosen": -2.2247493267059326, "logits/rejected": -2.2843899726867676, "logps/chosen": -1.5358319282531738, "logps/rejected": -22.01285171508789, "loss": 0.8668, "rewards/accuracies": 1.0, "rewards/chosen": 0.7896762490272522, "rewards/margins": 0.2318463921546936, "rewards/rejected": 0.5578298568725586, "step": 1468 }, { "epoch": 2.66, "learning_rate": 3.401326764394502e-09, "logits/chosen": -2.365988254547119, "logits/rejected": -2.369274854660034, "logps/chosen": -1.9270155429840088, "logps/rejected": -1.0030403137207031, "loss": 0.6833, "rewards/accuracies": 1.0, "rewards/chosen": 0.7806588411331177, "rewards/margins": 0.07278120517730713, "rewards/rejected": 0.7078776359558105, "step": 1469 }, { "epoch": 2.66, "learning_rate": 3.366023734717288e-09, "logits/chosen": -2.3612453937530518, "logits/rejected": -2.2731266021728516, "logps/chosen": -11.75148868560791, "logps/rejected": -1.8037675619125366, "loss": 0.6058, "rewards/accuracies": 0.0, "rewards/chosen": 0.49896764755249023, "rewards/margins": -0.2782829999923706, "rewards/rejected": 0.7772506475448608, "step": 1470 }, { "epoch": 2.66, "learning_rate": 3.3308984879320433e-09, "logits/chosen": -2.4628541469573975, "logits/rejected": -2.4797122478485107, "logps/chosen": -29.084125518798828, "logps/rejected": -39.618896484375, "loss": 0.413, "rewards/accuracies": 1.0, "rewards/chosen": 0.3369300961494446, "rewards/margins": 0.3889877498149872, "rewards/rejected": -0.052057649940252304, "step": 1471 }, { "epoch": 2.66, "learning_rate": 3.295951157946897e-09, "logits/chosen": -2.4456112384796143, "logits/rejected": -2.4391181468963623, "logps/chosen": -3.89275860786438, "logps/rejected": -2.02846097946167, "loss": 0.5126, "rewards/accuracies": 1.0, "rewards/chosen": 0.8472485542297363, "rewards/margins": 0.2907053232192993, "rewards/rejected": 0.556543231010437, "step": 1472 }, { "epoch": 2.66, "learning_rate": 3.261181877991692e-09, "logits/chosen": -2.3482213020324707, "logits/rejected": -2.3465018272399902, "logps/chosen": -0.9090534448623657, "logps/rejected": -9.521276473999023, "loss": 0.8002, "rewards/accuracies": 0.0, "rewards/chosen": 0.645753800868988, "rewards/margins": -0.19138211011886597, "rewards/rejected": 0.837135910987854, "step": 1473 }, { "epoch": 2.67, "learning_rate": 3.226590780617494e-09, "logits/chosen": -2.2549147605895996, "logits/rejected": -2.2515368461608887, "logps/chosen": -0.7580142021179199, "logps/rejected": -6.905200481414795, "loss": 0.5757, "rewards/accuracies": 1.0, "rewards/chosen": 0.6680303812026978, "rewards/margins": 0.264095276594162, "rewards/rejected": 0.40393510460853577, "step": 1474 }, { "epoch": 2.67, "learning_rate": 3.1921779976960772e-09, "logits/chosen": -2.344700574874878, "logits/rejected": -2.347607135772705, "logps/chosen": -6.592746734619141, "logps/rejected": -9.326456069946289, "loss": 0.6396, "rewards/accuracies": 0.0, "rewards/chosen": 0.6050286293029785, "rewards/margins": -0.27402544021606445, "rewards/rejected": 0.879054069519043, "step": 1475 }, { "epoch": 2.67, "learning_rate": 3.157943660419421e-09, "logits/chosen": -2.3498573303222656, "logits/rejected": -2.337608814239502, "logps/chosen": -3.256736993789673, "logps/rejected": -8.446106910705566, "loss": 0.5893, "rewards/accuracies": 1.0, "rewards/chosen": 0.5123756527900696, "rewards/margins": 0.10577288269996643, "rewards/rejected": 0.40660277009010315, "step": 1476 }, { "epoch": 2.67, "learning_rate": 3.1238878992992322e-09, "logits/chosen": -2.3594300746917725, "logits/rejected": -2.3596560955047607, "logps/chosen": -1.603304386138916, "logps/rejected": -3.7064177989959717, "loss": 0.6499, "rewards/accuracies": 1.0, "rewards/chosen": 0.7661013007164001, "rewards/margins": 0.07752734422683716, "rewards/rejected": 0.688573956489563, "step": 1477 }, { "epoch": 2.67, "learning_rate": 3.0900108441664295e-09, "logits/chosen": -2.171084403991699, "logits/rejected": -2.169689178466797, "logps/chosen": -6.285547256469727, "logps/rejected": -1.5995779037475586, "loss": 0.7373, "rewards/accuracies": 0.0, "rewards/chosen": 0.7173050045967102, "rewards/margins": -0.05672729015350342, "rewards/rejected": 0.7740322947502136, "step": 1478 }, { "epoch": 2.67, "learning_rate": 3.056312624170643e-09, "logits/chosen": -2.3104348182678223, "logits/rejected": -2.291019916534424, "logps/chosen": -7.372391223907471, "logps/rejected": 0.0, "loss": 0.5403, "rewards/accuracies": 1.0, "rewards/chosen": 0.49327388405799866, "rewards/margins": 0.49327388405799866, "rewards/rejected": 0.0, "step": 1479 }, { "epoch": 2.68, "learning_rate": 3.0227933677797136e-09, "logits/chosen": -2.254891872406006, "logits/rejected": -2.2936980724334717, "logps/chosen": 0.0, "logps/rejected": -1.9207813739776611, "loss": 0.8944, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": -0.8944123387336731, "rewards/rejected": 0.8944123387336731, "step": 1480 }, { "epoch": 2.68, "learning_rate": 2.989453202779252e-09, "logits/chosen": -2.3155267238616943, "logits/rejected": -2.313631772994995, "logps/chosen": -2.6157078742980957, "logps/rejected": -12.740853309631348, "loss": 0.4795, "rewards/accuracies": 1.0, "rewards/chosen": 0.92141193151474, "rewards/margins": 0.6589955687522888, "rewards/rejected": 0.26241636276245117, "step": 1481 }, { "epoch": 2.68, "learning_rate": 2.9562922562720972e-09, "logits/chosen": -2.2295725345611572, "logits/rejected": -2.237910270690918, "logps/chosen": -3.4762845039367676, "logps/rejected": -11.813432693481445, "loss": 0.4468, "rewards/accuracies": 1.0, "rewards/chosen": 0.8634018301963806, "rewards/margins": 0.6109064817428589, "rewards/rejected": 0.2524953782558441, "step": 1482 }, { "epoch": 2.68, "learning_rate": 2.9233106546778463e-09, "logits/chosen": -2.2445337772369385, "logits/rejected": -2.3283042907714844, "logps/chosen": -2.19173002243042, "logps/rejected": -24.04804039001465, "loss": 0.7175, "rewards/accuracies": 1.0, "rewards/chosen": 0.5412360429763794, "rewards/margins": 0.12363636493682861, "rewards/rejected": 0.4175996780395508, "step": 1483 }, { "epoch": 2.68, "learning_rate": 2.89050852373241e-09, "logits/chosen": -2.478250741958618, "logits/rejected": -2.4709157943725586, "logps/chosen": -20.805450439453125, "logps/rejected": -3.216081142425537, "loss": 0.4969, "rewards/accuracies": 1.0, "rewards/chosen": 0.7577348947525024, "rewards/margins": 0.19279617071151733, "rewards/rejected": 0.5649387240409851, "step": 1484 }, { "epoch": 2.69, "learning_rate": 2.8578859884874573e-09, "logits/chosen": -2.2875418663024902, "logits/rejected": -2.28994083404541, "logps/chosen": -2.378243923187256, "logps/rejected": -1.2740678787231445, "loss": 0.792, "rewards/accuracies": 1.0, "rewards/chosen": 0.7928637266159058, "rewards/margins": 0.13194310665130615, "rewards/rejected": 0.6609206199645996, "step": 1485 }, { "epoch": 2.69, "learning_rate": 2.8254431733100327e-09, "logits/chosen": -2.457864284515381, "logits/rejected": -2.397024631500244, "logps/chosen": -22.894317626953125, "logps/rejected": -3.659651517868042, "loss": 0.6791, "rewards/accuracies": 1.0, "rewards/chosen": 0.899989128112793, "rewards/margins": 0.15435093641281128, "rewards/rejected": 0.7456381916999817, "step": 1486 }, { "epoch": 2.69, "learning_rate": 2.793180201881995e-09, "logits/chosen": -2.3212339878082275, "logits/rejected": -2.329111337661743, "logps/chosen": -4.810620307922363, "logps/rejected": -2.6940646171569824, "loss": 0.7672, "rewards/accuracies": 0.0, "rewards/chosen": 0.4681920111179352, "rewards/margins": -0.17410579323768616, "rewards/rejected": 0.6422978043556213, "step": 1487 }, { "epoch": 2.69, "learning_rate": 2.761097197199597e-09, "logits/chosen": -2.399461269378662, "logits/rejected": -2.4012534618377686, "logps/chosen": -4.022261142730713, "logps/rejected": -3.403430700302124, "loss": 0.5084, "rewards/accuracies": 1.0, "rewards/chosen": 0.9572257995605469, "rewards/margins": 0.37433546781539917, "rewards/rejected": 0.5828903317451477, "step": 1488 }, { "epoch": 2.69, "learning_rate": 2.7291942815729995e-09, "logits/chosen": -2.2895398139953613, "logits/rejected": -2.4186313152313232, "logps/chosen": -1.733419418334961, "logps/rejected": -31.71073341369629, "loss": 0.8041, "rewards/accuracies": 1.0, "rewards/chosen": 0.6126753091812134, "rewards/margins": 0.06674766540527344, "rewards/rejected": 0.5459276437759399, "step": 1489 }, { "epoch": 2.69, "learning_rate": 2.6974715766258205e-09, "logits/chosen": -2.2627718448638916, "logits/rejected": -2.2630255222320557, "logps/chosen": -2.3498570919036865, "logps/rejected": -7.795444965362549, "loss": 0.5761, "rewards/accuracies": 0.0, "rewards/chosen": 0.8479911684989929, "rewards/margins": -0.03571361303329468, "rewards/rejected": 0.8837047815322876, "step": 1490 }, { "epoch": 2.7, "learning_rate": 2.6659292032946644e-09, "logits/chosen": -2.3433902263641357, "logits/rejected": -2.346463203430176, "logps/chosen": -3.4726462364196777, "logps/rejected": -8.474344253540039, "loss": 0.6092, "rewards/accuracies": 0.0, "rewards/chosen": 0.8457496762275696, "rewards/margins": -0.1371288299560547, "rewards/rejected": 0.9828785061836243, "step": 1491 }, { "epoch": 2.7, "learning_rate": 2.6345672818286314e-09, "logits/chosen": -2.338534355163574, "logits/rejected": -2.2672901153564453, "logps/chosen": -37.46992874145508, "logps/rejected": -2.999605655670166, "loss": 0.5868, "rewards/accuracies": 1.0, "rewards/chosen": 0.9019080996513367, "rewards/margins": 0.10068720579147339, "rewards/rejected": 0.8012208938598633, "step": 1492 }, { "epoch": 2.7, "learning_rate": 2.6033859317888983e-09, "logits/chosen": -2.2793519496917725, "logits/rejected": -2.2891900539398193, "logps/chosen": -2.4669930934906006, "logps/rejected": -2.0827550888061523, "loss": 0.6831, "rewards/accuracies": 1.0, "rewards/chosen": 0.7946307063102722, "rewards/margins": 0.11829853057861328, "rewards/rejected": 0.6763321757316589, "step": 1493 }, { "epoch": 2.7, "learning_rate": 2.5723852720482554e-09, "logits/chosen": -2.297877073287964, "logits/rejected": -2.292080879211426, "logps/chosen": -1.5229568481445312, "logps/rejected": -1.8492228984832764, "loss": 0.6941, "rewards/accuracies": 0.0, "rewards/chosen": 0.49139222502708435, "rewards/margins": -0.10476192831993103, "rewards/rejected": 0.5961541533470154, "step": 1494 }, { "epoch": 2.7, "learning_rate": 2.541565420790642e-09, "logits/chosen": -2.2169597148895264, "logits/rejected": -2.22688889503479, "logps/chosen": -2.432906150817871, "logps/rejected": -6.070557594299316, "loss": 0.8009, "rewards/accuracies": 0.0, "rewards/chosen": 0.6160475611686707, "rewards/margins": -0.48089665174484253, "rewards/rejected": 1.0969442129135132, "step": 1495 }, { "epoch": 2.71, "learning_rate": 2.5109264955106847e-09, "logits/chosen": -2.295961380004883, "logits/rejected": -2.3026812076568604, "logps/chosen": -2.490257501602173, "logps/rejected": -9.062827110290527, "loss": 0.7406, "rewards/accuracies": 0.0, "rewards/chosen": 0.7772205471992493, "rewards/margins": -0.04877364635467529, "rewards/rejected": 0.8259941935539246, "step": 1496 }, { "epoch": 2.71, "learning_rate": 2.480468613013298e-09, "logits/chosen": -2.3322622776031494, "logits/rejected": -2.5227162837982178, "logps/chosen": -6.222225189208984, "logps/rejected": -39.40650939941406, "loss": 0.6273, "rewards/accuracies": 0.0, "rewards/chosen": 0.1903902143239975, "rewards/margins": -0.19062136113643646, "rewards/rejected": 0.38101157546043396, "step": 1497 }, { "epoch": 2.71, "learning_rate": 2.450191889413172e-09, "logits/chosen": -2.2247469425201416, "logits/rejected": -2.230009078979492, "logps/chosen": -2.3781166076660156, "logps/rejected": -9.393165588378906, "loss": 0.7771, "rewards/accuracies": 0.0, "rewards/chosen": 0.5509247779846191, "rewards/margins": -0.3117758631706238, "rewards/rejected": 0.8627006411552429, "step": 1498 }, { "epoch": 2.71, "learning_rate": 2.4200964401343947e-09, "logits/chosen": -2.3015801906585693, "logits/rejected": -2.311758518218994, "logps/chosen": -5.0179948806762695, "logps/rejected": -3.645019054412842, "loss": 0.6394, "rewards/accuracies": 0.0, "rewards/chosen": 0.6910733580589294, "rewards/margins": -0.03700476884841919, "rewards/rejected": 0.7280781269073486, "step": 1499 }, { "epoch": 2.71, "learning_rate": 2.390182379909955e-09, "logits/chosen": -2.4422757625579834, "logits/rejected": -2.4424548149108887, "logps/chosen": -0.8785430192947388, "logps/rejected": -2.722381830215454, "loss": 0.656, "rewards/accuracies": 1.0, "rewards/chosen": 0.5595220327377319, "rewards/margins": 0.15449807047843933, "rewards/rejected": 0.4050239622592926, "step": 1500 }, { "epoch": 2.71, "learning_rate": 2.3604498227813407e-09, "logits/chosen": -2.3348348140716553, "logits/rejected": -2.3435139656066895, "logps/chosen": -3.5368847846984863, "logps/rejected": -1.0437798500061035, "loss": 0.6231, "rewards/accuracies": 0.0, "rewards/chosen": 0.4895448386669159, "rewards/margins": -0.06878849864006042, "rewards/rejected": 0.5583333373069763, "step": 1501 }, { "epoch": 2.72, "learning_rate": 2.3308988820981034e-09, "logits/chosen": -2.3289947509765625, "logits/rejected": -2.334845542907715, "logps/chosen": -0.8140456080436707, "logps/rejected": -3.9998974800109863, "loss": 0.4982, "rewards/accuracies": 1.0, "rewards/chosen": 0.8754568099975586, "rewards/margins": 0.46250632405281067, "rewards/rejected": 0.4129504859447479, "step": 1502 }, { "epoch": 2.72, "learning_rate": 2.301529670517416e-09, "logits/chosen": -2.3203935623168945, "logits/rejected": -2.335005283355713, "logps/chosen": -1.1781368255615234, "logps/rejected": -12.236945152282715, "loss": 0.6235, "rewards/accuracies": 1.0, "rewards/chosen": 0.5267292261123657, "rewards/margins": 0.2782931327819824, "rewards/rejected": 0.2484360784292221, "step": 1503 }, { "epoch": 2.72, "learning_rate": 2.272342300003638e-09, "logits/chosen": -2.2174787521362305, "logits/rejected": -2.213329553604126, "logps/chosen": -1.7731719017028809, "logps/rejected": -5.220693588256836, "loss": 0.5461, "rewards/accuracies": 1.0, "rewards/chosen": 1.055177092552185, "rewards/margins": 0.39659231901168823, "rewards/rejected": 0.6585847735404968, "step": 1504 }, { "epoch": 2.72, "learning_rate": 2.2433368818278896e-09, "logits/chosen": -2.329007148742676, "logits/rejected": -2.3776443004608154, "logps/chosen": -0.7319153547286987, "logps/rejected": -12.960281372070312, "loss": 0.6363, "rewards/accuracies": 0.0, "rewards/chosen": 0.47909364104270935, "rewards/margins": -0.08036968111991882, "rewards/rejected": 0.5594633221626282, "step": 1505 }, { "epoch": 2.72, "learning_rate": 2.214513526567635e-09, "logits/chosen": -2.350259304046631, "logits/rejected": -2.3601999282836914, "logps/chosen": -3.2106547355651855, "logps/rejected": -2.8526840209960938, "loss": 0.5486, "rewards/accuracies": 1.0, "rewards/chosen": 0.7692691683769226, "rewards/margins": 0.18392914533615112, "rewards/rejected": 0.5853400230407715, "step": 1506 }, { "epoch": 2.73, "learning_rate": 2.1858723441062876e-09, "logits/chosen": -2.3498692512512207, "logits/rejected": -2.350410223007202, "logps/chosen": -10.008383750915527, "logps/rejected": -4.261614799499512, "loss": 0.7599, "rewards/accuracies": 1.0, "rewards/chosen": 0.9672860503196716, "rewards/margins": 0.2919411063194275, "rewards/rejected": 0.6753449440002441, "step": 1507 }, { "epoch": 2.73, "learning_rate": 2.157413443632716e-09, "logits/chosen": -2.312685012817383, "logits/rejected": -2.315855026245117, "logps/chosen": -5.8746209144592285, "logps/rejected": -6.8768510818481445, "loss": 0.4128, "rewards/accuracies": 1.0, "rewards/chosen": 1.0183231830596924, "rewards/margins": 0.8291822075843811, "rewards/rejected": 0.18914099037647247, "step": 1508 }, { "epoch": 2.73, "learning_rate": 2.1291369336409183e-09, "logits/chosen": -2.333772659301758, "logits/rejected": -2.3291361331939697, "logps/chosen": -5.527935028076172, "logps/rejected": -7.072075366973877, "loss": 0.6392, "rewards/accuracies": 1.0, "rewards/chosen": 1.1761242151260376, "rewards/margins": 0.5295275449752808, "rewards/rejected": 0.6465966701507568, "step": 1509 }, { "epoch": 2.73, "learning_rate": 2.1010429219295413e-09, "logits/chosen": -2.34031081199646, "logits/rejected": -2.3187527656555176, "logps/chosen": -3.6864655017852783, "logps/rejected": 0.0, "loss": 0.3041, "rewards/accuracies": 1.0, "rewards/chosen": 0.7030135989189148, "rewards/margins": 0.7030135989189148, "rewards/rejected": 0.0, "step": 1510 }, { "epoch": 2.73, "learning_rate": 2.073131515601484e-09, "logits/chosen": -2.3523948192596436, "logits/rejected": -2.3548765182495117, "logps/chosen": -2.5464391708374023, "logps/rejected": -2.7100934982299805, "loss": 0.8291, "rewards/accuracies": 1.0, "rewards/chosen": 0.7214941382408142, "rewards/margins": 0.05016547441482544, "rewards/rejected": 0.6713286638259888, "step": 1511 }, { "epoch": 2.73, "learning_rate": 2.04540282106353e-09, "logits/chosen": -2.33246111869812, "logits/rejected": -2.336871385574341, "logps/chosen": -2.5091311931610107, "logps/rejected": -1.085760235786438, "loss": 0.5402, "rewards/accuracies": 1.0, "rewards/chosen": 0.6956111192703247, "rewards/margins": 0.07401877641677856, "rewards/rejected": 0.6215923428535461, "step": 1512 }, { "epoch": 2.74, "learning_rate": 2.017856944025881e-09, "logits/chosen": -2.3506884574890137, "logits/rejected": -2.464550018310547, "logps/chosen": -1.6429238319396973, "logps/rejected": -25.734004974365234, "loss": 0.5857, "rewards/accuracies": 1.0, "rewards/chosen": 0.647354781627655, "rewards/margins": 0.16355597972869873, "rewards/rejected": 0.4837988018989563, "step": 1513 }, { "epoch": 2.74, "learning_rate": 1.9904939895017846e-09, "logits/chosen": -2.303060293197632, "logits/rejected": -2.3222076892852783, "logps/chosen": -1.475577473640442, "logps/rejected": -33.947967529296875, "loss": 0.5655, "rewards/accuracies": 1.0, "rewards/chosen": 0.7276864051818848, "rewards/margins": 0.12546241283416748, "rewards/rejected": 0.6022239923477173, "step": 1514 }, { "epoch": 2.74, "learning_rate": 1.9633140618071475e-09, "logits/chosen": -2.4129574298858643, "logits/rejected": -2.417494773864746, "logps/chosen": -2.288188934326172, "logps/rejected": -7.685879707336426, "loss": 0.6377, "rewards/accuracies": 0.0, "rewards/chosen": 0.9284598231315613, "rewards/margins": -0.12489193677902222, "rewards/rejected": 1.0533517599105835, "step": 1515 }, { "epoch": 2.74, "learning_rate": 1.9363172645601276e-09, "logits/chosen": -2.2661237716674805, "logits/rejected": -2.2699899673461914, "logps/chosen": -2.360816240310669, "logps/rejected": -4.877577781677246, "loss": 0.4847, "rewards/accuracies": 1.0, "rewards/chosen": 0.8206543326377869, "rewards/margins": 0.3250781297683716, "rewards/rejected": 0.4955762028694153, "step": 1516 }, { "epoch": 2.74, "learning_rate": 1.9095037006807093e-09, "logits/chosen": -2.3435885906219482, "logits/rejected": -2.3178980350494385, "logps/chosen": -0.5756005048751831, "logps/rejected": 0.0, "loss": 0.5495, "rewards/accuracies": 1.0, "rewards/chosen": 0.5539804100990295, "rewards/margins": 0.5539804100990295, "rewards/rejected": 0.0, "step": 1517 }, { "epoch": 2.75, "learning_rate": 1.8828734723903506e-09, "logits/chosen": -2.343142509460449, "logits/rejected": -2.340787887573242, "logps/chosen": -7.00982141494751, "logps/rejected": -7.076488018035889, "loss": 0.6591, "rewards/accuracies": 1.0, "rewards/chosen": 0.4803175628185272, "rewards/margins": 0.15872317552566528, "rewards/rejected": 0.32159438729286194, "step": 1518 }, { "epoch": 2.75, "learning_rate": 1.8564266812115647e-09, "logits/chosen": -2.2787070274353027, "logits/rejected": -2.274578094482422, "logps/chosen": -4.245882034301758, "logps/rejected": -3.273293972015381, "loss": 0.4866, "rewards/accuracies": 1.0, "rewards/chosen": 0.9862882494926453, "rewards/margins": 0.4964904189109802, "rewards/rejected": 0.48979783058166504, "step": 1519 }, { "epoch": 2.75, "learning_rate": 1.8301634279675848e-09, "logits/chosen": -2.362887382507324, "logits/rejected": -2.365513324737549, "logps/chosen": -2.465996026992798, "logps/rejected": -8.580533981323242, "loss": 0.6746, "rewards/accuracies": 0.0, "rewards/chosen": 0.5522161722183228, "rewards/margins": -0.35795706510543823, "rewards/rejected": 0.910173237323761, "step": 1520 }, { "epoch": 2.75, "learning_rate": 1.8040838127818868e-09, "logits/chosen": -2.3043346405029297, "logits/rejected": -2.307460069656372, "logps/chosen": -2.299680709838867, "logps/rejected": -0.8169368505477905, "loss": 0.7673, "rewards/accuracies": 0.0, "rewards/chosen": 0.6489397883415222, "rewards/margins": -0.22905981540679932, "rewards/rejected": 0.8779996037483215, "step": 1521 }, { "epoch": 2.75, "learning_rate": 1.7781879350779072e-09, "logits/chosen": -2.429325580596924, "logits/rejected": -2.430220603942871, "logps/chosen": -2.123188018798828, "logps/rejected": -1.3390898704528809, "loss": 0.7624, "rewards/accuracies": 0.0, "rewards/chosen": 0.5133969187736511, "rewards/margins": -0.01987665891647339, "rewards/rejected": 0.5332735776901245, "step": 1522 }, { "epoch": 2.75, "learning_rate": 1.7524758935786032e-09, "logits/chosen": -2.2612252235412598, "logits/rejected": -2.3112876415252686, "logps/chosen": -1.4157050848007202, "logps/rejected": -29.33150863647461, "loss": 0.54, "rewards/accuracies": 1.0, "rewards/chosen": 0.8214956521987915, "rewards/margins": 0.45156577229499817, "rewards/rejected": 0.36992987990379333, "step": 1523 }, { "epoch": 2.76, "learning_rate": 1.7269477863060767e-09, "logits/chosen": -2.3244597911834717, "logits/rejected": -2.32210636138916, "logps/chosen": -1.2801156044006348, "logps/rejected": -2.049071788787842, "loss": 0.6196, "rewards/accuracies": 1.0, "rewards/chosen": 1.0282487869262695, "rewards/margins": 0.42326462268829346, "rewards/rejected": 0.6049841642379761, "step": 1524 }, { "epoch": 2.76, "learning_rate": 1.7016037105812565e-09, "logits/chosen": -2.3235528469085693, "logits/rejected": -2.3256969451904297, "logps/chosen": -1.1383297443389893, "logps/rejected": -9.098976135253906, "loss": 0.5413, "rewards/accuracies": 1.0, "rewards/chosen": 0.7112003564834595, "rewards/margins": 0.45198071002960205, "rewards/rejected": 0.2592196464538574, "step": 1525 }, { "epoch": 2.76, "learning_rate": 1.6764437630234385e-09, "logits/chosen": -2.3871233463287354, "logits/rejected": -2.394528388977051, "logps/chosen": -1.358134150505066, "logps/rejected": -4.561766147613525, "loss": 0.7529, "rewards/accuracies": 0.0, "rewards/chosen": 0.5794515013694763, "rewards/margins": -0.19053012132644653, "rewards/rejected": 0.7699816226959229, "step": 1526 }, { "epoch": 2.76, "learning_rate": 1.6514680395500024e-09, "logits/chosen": -2.3240303993225098, "logits/rejected": -2.2941720485687256, "logps/chosen": -1.4039899110794067, "logps/rejected": 0.0, "loss": 0.5144, "rewards/accuracies": 1.0, "rewards/chosen": 0.6987823843955994, "rewards/margins": 0.6987823843955994, "rewards/rejected": 0.0, "step": 1527 }, { "epoch": 2.76, "learning_rate": 1.6266766353759954e-09, "logits/chosen": -2.3195712566375732, "logits/rejected": -2.3265035152435303, "logps/chosen": -1.53440523147583, "logps/rejected": -4.554307460784912, "loss": 0.5799, "rewards/accuracies": 1.0, "rewards/chosen": 0.5942738652229309, "rewards/margins": 0.2546935975551605, "rewards/rejected": 0.3395802676677704, "step": 1528 }, { "epoch": 2.76, "learning_rate": 1.6020696450137926e-09, "logits/chosen": -2.3025224208831787, "logits/rejected": -2.3471386432647705, "logps/chosen": -7.36953067779541, "logps/rejected": -15.131745338439941, "loss": 0.597, "rewards/accuracies": 1.0, "rewards/chosen": 0.966209888458252, "rewards/margins": 0.4304977059364319, "rewards/rejected": 0.5357121825218201, "step": 1529 }, { "epoch": 2.77, "learning_rate": 1.5776471622727106e-09, "logits/chosen": -2.3000283241271973, "logits/rejected": -2.2980332374572754, "logps/chosen": -0.7704344391822815, "logps/rejected": -4.597259044647217, "loss": 0.7527, "rewards/accuracies": 1.0, "rewards/chosen": 0.3987615704536438, "rewards/margins": 0.11204871535301208, "rewards/rejected": 0.2867128551006317, "step": 1530 }, { "epoch": 2.77, "learning_rate": 1.553409280258683e-09, "logits/chosen": -2.2780232429504395, "logits/rejected": -2.281782627105713, "logps/chosen": -2.7558441162109375, "logps/rejected": -2.0976593494415283, "loss": 0.6457, "rewards/accuracies": 0.0, "rewards/chosen": 0.47210559248924255, "rewards/margins": -0.005284219980239868, "rewards/rejected": 0.4773898124694824, "step": 1531 }, { "epoch": 2.77, "learning_rate": 1.5293560913738735e-09, "logits/chosen": -2.2849972248077393, "logits/rejected": -2.310673713684082, "logps/chosen": 0.0, "logps/rejected": -2.3216748237609863, "loss": 0.9803, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": -1.045373558998108, "rewards/rejected": 1.045373558998108, "step": 1532 }, { "epoch": 2.77, "learning_rate": 1.5054876873163591e-09, "logits/chosen": -2.324399948120117, "logits/rejected": -2.3338310718536377, "logps/chosen": -1.5064233541488647, "logps/rejected": -10.207742691040039, "loss": 0.5876, "rewards/accuracies": 1.0, "rewards/chosen": 0.6003562808036804, "rewards/margins": 0.31466051936149597, "rewards/rejected": 0.28569576144218445, "step": 1533 }, { "epoch": 2.77, "learning_rate": 1.4818041590797526e-09, "logits/chosen": -2.3234076499938965, "logits/rejected": -2.3261706829071045, "logps/chosen": -1.2027966976165771, "logps/rejected": -1.9880118370056152, "loss": 0.6611, "rewards/accuracies": 1.0, "rewards/chosen": 0.6084786653518677, "rewards/margins": 0.0735476016998291, "rewards/rejected": 0.5349310636520386, "step": 1534 }, { "epoch": 2.78, "learning_rate": 1.4583055969528525e-09, "logits/chosen": -2.3732659816741943, "logits/rejected": -2.3720273971557617, "logps/chosen": -1.407424807548523, "logps/rejected": -7.503518581390381, "loss": 0.7209, "rewards/accuracies": 0.0, "rewards/chosen": 0.6301897168159485, "rewards/margins": -0.340079128742218, "rewards/rejected": 0.9702688455581665, "step": 1535 }, { "epoch": 2.78, "learning_rate": 1.434992090519327e-09, "logits/chosen": -2.4335172176361084, "logits/rejected": -2.448810577392578, "logps/chosen": -0.7040889859199524, "logps/rejected": -6.9473371505737305, "loss": 0.5635, "rewards/accuracies": 1.0, "rewards/chosen": 0.610575795173645, "rewards/margins": 0.22663843631744385, "rewards/rejected": 0.38393735885620117, "step": 1536 }, { "epoch": 2.78, "learning_rate": 1.4118637286573475e-09, "logits/chosen": -2.349527359008789, "logits/rejected": -2.3233225345611572, "logps/chosen": -2.3011841773986816, "logps/rejected": 0.0, "loss": 0.5508, "rewards/accuracies": 1.0, "rewards/chosen": 0.8454729318618774, "rewards/margins": 0.8454729318618774, "rewards/rejected": 0.0, "step": 1537 }, { "epoch": 2.78, "learning_rate": 1.388920599539256e-09, "logits/chosen": -2.340311050415039, "logits/rejected": -2.3427698612213135, "logps/chosen": -2.328380584716797, "logps/rejected": -4.772294521331787, "loss": 0.6029, "rewards/accuracies": 1.0, "rewards/chosen": 0.7367181777954102, "rewards/margins": 0.3782050609588623, "rewards/rejected": 0.35851311683654785, "step": 1538 }, { "epoch": 2.78, "learning_rate": 1.3661627906312368e-09, "logits/chosen": -2.2852044105529785, "logits/rejected": -2.281400203704834, "logps/chosen": -8.973540306091309, "logps/rejected": -4.665271759033203, "loss": 0.6571, "rewards/accuracies": 1.0, "rewards/chosen": 0.5272828936576843, "rewards/margins": 0.14276254177093506, "rewards/rejected": 0.38452035188674927, "step": 1539 }, { "epoch": 2.78, "learning_rate": 1.343590388692978e-09, "logits/chosen": -2.3286406993865967, "logits/rejected": -2.323526620864868, "logps/chosen": -2.918529510498047, "logps/rejected": -9.60845947265625, "loss": 0.795, "rewards/accuracies": 0.0, "rewards/chosen": 0.8981935381889343, "rewards/margins": -0.44585365056991577, "rewards/rejected": 1.34404718875885, "step": 1540 }, { "epoch": 2.79, "learning_rate": 1.3212034797773275e-09, "logits/chosen": -2.262483596801758, "logits/rejected": -2.272048234939575, "logps/chosen": -1.2301008701324463, "logps/rejected": -3.6340878009796143, "loss": 0.6807, "rewards/accuracies": 1.0, "rewards/chosen": 0.7024641036987305, "rewards/margins": 0.2808699607849121, "rewards/rejected": 0.42159414291381836, "step": 1541 }, { "epoch": 2.79, "learning_rate": 1.2990021492299997e-09, "logits/chosen": -2.3174691200256348, "logits/rejected": -2.2932963371276855, "logps/chosen": -1.8322312831878662, "logps/rejected": 0.0, "loss": 0.4656, "rewards/accuracies": 1.0, "rewards/chosen": 0.6055456399917603, "rewards/margins": 0.6055456399917603, "rewards/rejected": 0.0, "step": 1542 }, { "epoch": 2.79, "learning_rate": 1.2769864816892072e-09, "logits/chosen": -2.2983005046844482, "logits/rejected": -2.2915470600128174, "logps/chosen": -2.9316940307617188, "logps/rejected": -2.139450788497925, "loss": 0.6732, "rewards/accuracies": 0.0, "rewards/chosen": 0.7127413153648376, "rewards/margins": -0.0404549241065979, "rewards/rejected": 0.7531962394714355, "step": 1543 }, { "epoch": 2.79, "learning_rate": 1.2551565610853687e-09, "logits/chosen": -2.264735221862793, "logits/rejected": -2.425766944885254, "logps/chosen": -2.0727224349975586, "logps/rejected": -25.80571746826172, "loss": 0.5515, "rewards/accuracies": 1.0, "rewards/chosen": 0.6961755156517029, "rewards/margins": 0.34822818636894226, "rewards/rejected": 0.3479473292827606, "step": 1544 }, { "epoch": 2.79, "learning_rate": 1.2335124706407685e-09, "logits/chosen": -2.286219835281372, "logits/rejected": -2.289717435836792, "logps/chosen": -1.7140476703643799, "logps/rejected": -2.566236972808838, "loss": 0.6359, "rewards/accuracies": 1.0, "rewards/chosen": 0.8780898451805115, "rewards/margins": 0.3797900378704071, "rewards/rejected": 0.49829980731010437, "step": 1545 }, { "epoch": 2.8, "learning_rate": 1.2120542928692634e-09, "logits/chosen": -2.308279275894165, "logits/rejected": -2.3129329681396484, "logps/chosen": -3.8120570182800293, "logps/rejected": -3.1097311973571777, "loss": 0.6501, "rewards/accuracies": 0.0, "rewards/chosen": 0.5723831057548523, "rewards/margins": -0.013020098209381104, "rewards/rejected": 0.5854032039642334, "step": 1546 }, { "epoch": 2.8, "learning_rate": 1.1907821095759552e-09, "logits/chosen": -2.313884973526001, "logits/rejected": -2.3185551166534424, "logps/chosen": -2.85622501373291, "logps/rejected": -7.606881141662598, "loss": 0.8376, "rewards/accuracies": 0.0, "rewards/chosen": 0.6276332139968872, "rewards/margins": -0.24870872497558594, "rewards/rejected": 0.8763419389724731, "step": 1547 }, { "epoch": 2.8, "learning_rate": 1.1696960018568736e-09, "logits/chosen": -2.2317166328430176, "logits/rejected": -2.228731393814087, "logps/chosen": -1.762075662612915, "logps/rejected": -1.7726411819458008, "loss": 0.654, "rewards/accuracies": 1.0, "rewards/chosen": 0.7417234778404236, "rewards/margins": 0.29311832785606384, "rewards/rejected": 0.44860514998435974, "step": 1548 }, { "epoch": 2.8, "learning_rate": 1.1487960500986605e-09, "logits/chosen": -2.3380918502807617, "logits/rejected": -2.3586270809173584, "logps/chosen": -1.64430832862854, "logps/rejected": -23.716665267944336, "loss": 0.8263, "rewards/accuracies": 0.0, "rewards/chosen": 0.44975051283836365, "rewards/margins": -0.10311433672904968, "rewards/rejected": 0.5528648495674133, "step": 1549 }, { "epoch": 2.8, "learning_rate": 1.1280823339782808e-09, "logits/chosen": -2.179609537124634, "logits/rejected": -2.174636125564575, "logps/chosen": -0.9168002009391785, "logps/rejected": -3.443035840988159, "loss": 0.7514, "rewards/accuracies": 1.0, "rewards/chosen": 0.6548624634742737, "rewards/margins": 0.1799910068511963, "rewards/rejected": 0.4748714566230774, "step": 1550 }, { "epoch": 2.8, "learning_rate": 1.1075549324627287e-09, "logits/chosen": -2.227670192718506, "logits/rejected": -2.21833872795105, "logps/chosen": -8.046731948852539, "logps/rejected": -9.500625610351562, "loss": 0.7986, "rewards/accuracies": 0.0, "rewards/chosen": 0.49019843339920044, "rewards/margins": -0.33036601543426514, "rewards/rejected": 0.8205644488334656, "step": 1551 }, { "epoch": 2.81, "learning_rate": 1.087213923808694e-09, "logits/chosen": -2.2827675342559814, "logits/rejected": -2.2801008224487305, "logps/chosen": -8.57435417175293, "logps/rejected": -2.701741933822632, "loss": 0.6349, "rewards/accuracies": 1.0, "rewards/chosen": 1.0831003189086914, "rewards/margins": 0.48238885402679443, "rewards/rejected": 0.600711464881897, "step": 1552 }, { "epoch": 2.81, "learning_rate": 1.0670593855622856e-09, "logits/chosen": -2.2210371494293213, "logits/rejected": -2.226628303527832, "logps/chosen": -4.602593898773193, "logps/rejected": -2.103588342666626, "loss": 0.63, "rewards/accuracies": 1.0, "rewards/chosen": 0.7149525284767151, "rewards/margins": 0.19959789514541626, "rewards/rejected": 0.5153546333312988, "step": 1553 }, { "epoch": 2.81, "learning_rate": 1.0470913945587244e-09, "logits/chosen": -2.2428946495056152, "logits/rejected": -2.2910308837890625, "logps/chosen": -0.9948699474334717, "logps/rejected": -36.73668670654297, "loss": 0.8363, "rewards/accuracies": 0.0, "rewards/chosen": 0.508592426776886, "rewards/margins": -0.07646197080612183, "rewards/rejected": 0.5850543975830078, "step": 1554 }, { "epoch": 2.81, "learning_rate": 1.027310026922068e-09, "logits/chosen": -2.264256000518799, "logits/rejected": -2.2651455402374268, "logps/chosen": -2.060563564300537, "logps/rejected": -2.5966196060180664, "loss": 0.5556, "rewards/accuracies": 1.0, "rewards/chosen": 1.0588932037353516, "rewards/margins": 0.29190540313720703, "rewards/rejected": 0.7669878005981445, "step": 1555 }, { "epoch": 2.81, "learning_rate": 1.0077153580648977e-09, "logits/chosen": -2.345078468322754, "logits/rejected": -2.341057777404785, "logps/chosen": -4.386848449707031, "logps/rejected": -3.3874104022979736, "loss": 0.57, "rewards/accuracies": 1.0, "rewards/chosen": 0.6015384793281555, "rewards/margins": 0.22792309522628784, "rewards/rejected": 0.3736153841018677, "step": 1556 }, { "epoch": 2.82, "learning_rate": 9.883074626880429e-10, "logits/chosen": -2.2370078563690186, "logits/rejected": -2.231370210647583, "logps/chosen": -0.9891913533210754, "logps/rejected": -5.690295219421387, "loss": 0.685, "rewards/accuracies": 1.0, "rewards/chosen": 0.5102315545082092, "rewards/margins": 0.022490262985229492, "rewards/rejected": 0.48774129152297974, "step": 1557 }, { "epoch": 2.82, "learning_rate": 9.690864147803069e-10, "logits/chosen": -2.2657902240753174, "logits/rejected": -2.2509560585021973, "logps/chosen": -5.698957920074463, "logps/rejected": -7.082986831665039, "loss": 0.524, "rewards/accuracies": 0.0, "rewards/chosen": 0.44354939460754395, "rewards/margins": -0.013098478317260742, "rewards/rejected": 0.4566478729248047, "step": 1558 }, { "epoch": 2.82, "learning_rate": 9.500522876181693e-10, "logits/chosen": -2.3733444213867188, "logits/rejected": -2.3733327388763428, "logps/chosen": -1.5812549591064453, "logps/rejected": -6.072324752807617, "loss": 0.6787, "rewards/accuracies": 0.0, "rewards/chosen": 0.4657295346260071, "rewards/margins": -0.37211835384368896, "rewards/rejected": 0.837847888469696, "step": 1559 }, { "epoch": 2.82, "learning_rate": 9.312051537655074e-10, "logits/chosen": -2.260568857192993, "logits/rejected": -2.311786413192749, "logps/chosen": -1.8351138830184937, "logps/rejected": -24.508258819580078, "loss": 0.5585, "rewards/accuracies": 1.0, "rewards/chosen": 0.5530802011489868, "rewards/margins": 0.5644369125366211, "rewards/rejected": -0.011356735602021217, "step": 1560 }, { "epoch": 2.82, "learning_rate": 9.12545085073313e-10, "logits/chosen": -2.2038533687591553, "logits/rejected": -2.195369005203247, "logps/chosen": -1.5234506130218506, "logps/rejected": -4.118592262268066, "loss": 0.5647, "rewards/accuracies": 1.0, "rewards/chosen": 0.7374419569969177, "rewards/margins": 0.22491532564163208, "rewards/rejected": 0.5125266313552856, "step": 1561 }, { "epoch": 2.82, "learning_rate": 8.940721526794482e-10, "logits/chosen": -2.371781826019287, "logits/rejected": -2.3710057735443115, "logps/chosen": -0.7853111624717712, "logps/rejected": -6.202244758605957, "loss": 0.569, "rewards/accuracies": 1.0, "rewards/chosen": 0.7324095368385315, "rewards/margins": 0.3873341977596283, "rewards/rejected": 0.3450753390789032, "step": 1562 }, { "epoch": 2.83, "learning_rate": 8.75786427008346e-10, "logits/chosen": -2.349985361099243, "logits/rejected": -2.2980024814605713, "logps/chosen": -10.671462059020996, "logps/rejected": -4.6941375732421875, "loss": 0.5908, "rewards/accuracies": 0.0, "rewards/chosen": 0.4152278006076813, "rewards/margins": -0.16786053776741028, "rewards/rejected": 0.5830883383750916, "step": 1563 }, { "epoch": 2.83, "learning_rate": 8.57687977770749e-10, "logits/chosen": -2.3105196952819824, "logits/rejected": -2.3840649127960205, "logps/chosen": -12.862874031066895, "logps/rejected": -41.095497131347656, "loss": 0.3032, "rewards/accuracies": 1.0, "rewards/chosen": 1.3338344097137451, "rewards/margins": 1.5621123313903809, "rewards/rejected": -0.22827796638011932, "step": 1564 }, { "epoch": 2.83, "learning_rate": 8.397768739634492e-10, "logits/chosen": -2.3340189456939697, "logits/rejected": -2.3300979137420654, "logps/chosen": -7.535846710205078, "logps/rejected": -1.3554309606552124, "loss": 0.5716, "rewards/accuracies": 1.0, "rewards/chosen": 0.9438190460205078, "rewards/margins": 0.34745728969573975, "rewards/rejected": 0.5963617563247681, "step": 1565 }, { "epoch": 2.83, "learning_rate": 8.220531838690203e-10, "logits/chosen": -2.3133738040924072, "logits/rejected": -2.3946399688720703, "logps/chosen": -3.9594650268554688, "logps/rejected": -22.339801788330078, "loss": 0.3821, "rewards/accuracies": 1.0, "rewards/chosen": 0.7127212882041931, "rewards/margins": 0.9140499830245972, "rewards/rejected": -0.20132866501808167, "step": 1566 }, { "epoch": 2.83, "learning_rate": 8.045169750555413e-10, "logits/chosen": -2.3756284713745117, "logits/rejected": -2.378232002258301, "logps/chosen": -0.6186177730560303, "logps/rejected": -5.143754005432129, "loss": 0.6427, "rewards/accuracies": 1.0, "rewards/chosen": 0.7575713396072388, "rewards/margins": 0.1601613163948059, "rewards/rejected": 0.5974100232124329, "step": 1567 }, { "epoch": 2.84, "learning_rate": 7.871683143763907e-10, "logits/chosen": -2.3479158878326416, "logits/rejected": -2.324404001235962, "logps/chosen": -1.0394681692123413, "logps/rejected": 0.0, "loss": 0.5063, "rewards/accuracies": 1.0, "rewards/chosen": 0.6318559050559998, "rewards/margins": 0.6318559050559998, "rewards/rejected": 0.0, "step": 1568 }, { "epoch": 2.84, "learning_rate": 7.700072679699299e-10, "logits/chosen": -2.3217363357543945, "logits/rejected": -2.3012189865112305, "logps/chosen": -9.172832489013672, "logps/rejected": -2.6917481422424316, "loss": 0.6069, "rewards/accuracies": 1.0, "rewards/chosen": 1.0108288526535034, "rewards/margins": 0.51115882396698, "rewards/rejected": 0.49967002868652344, "step": 1569 }, { "epoch": 2.84, "learning_rate": 7.530339012592701e-10, "logits/chosen": -2.3100032806396484, "logits/rejected": -2.318892240524292, "logps/chosen": -4.794817924499512, "logps/rejected": -2.4304542541503906, "loss": 0.6868, "rewards/accuracies": 1.0, "rewards/chosen": 0.6420685648918152, "rewards/margins": 0.18017739057540894, "rewards/rejected": 0.46189117431640625, "step": 1570 }, { "epoch": 2.84, "learning_rate": 7.362482789520618e-10, "logits/chosen": -2.290712833404541, "logits/rejected": -2.2942330837249756, "logps/chosen": -4.822291851043701, "logps/rejected": -2.9683010578155518, "loss": 0.6221, "rewards/accuracies": 1.0, "rewards/chosen": 0.8642174005508423, "rewards/margins": 0.3039208650588989, "rewards/rejected": 0.5602965354919434, "step": 1571 }, { "epoch": 2.84, "learning_rate": 7.196504650401891e-10, "logits/chosen": -2.372036933898926, "logits/rejected": -2.3686630725860596, "logps/chosen": -1.116978645324707, "logps/rejected": -1.8186699151992798, "loss": 0.64, "rewards/accuracies": 0.0, "rewards/chosen": 0.5316922068595886, "rewards/margins": -0.12714457511901855, "rewards/rejected": 0.6588367819786072, "step": 1572 }, { "epoch": 2.84, "learning_rate": 7.032405227995753e-10, "logits/chosen": -2.3747923374176025, "logits/rejected": -2.3759877681732178, "logps/chosen": -3.4142634868621826, "logps/rejected": -3.2689778804779053, "loss": 0.5826, "rewards/accuracies": 1.0, "rewards/chosen": 0.7653332948684692, "rewards/margins": 0.12493306398391724, "rewards/rejected": 0.640400230884552, "step": 1573 }, { "epoch": 2.85, "learning_rate": 6.870185147898943e-10, "logits/chosen": -2.379702568054199, "logits/rejected": -2.4042818546295166, "logps/chosen": -6.6345133781433105, "logps/rejected": -11.594852447509766, "loss": 0.6566, "rewards/accuracies": 1.0, "rewards/chosen": 1.005157709121704, "rewards/margins": 0.04898554086685181, "rewards/rejected": 0.9561721682548523, "step": 1574 }, { "epoch": 2.85, "learning_rate": 6.709845028543715e-10, "logits/chosen": -2.4265964031219482, "logits/rejected": -2.425334930419922, "logps/chosen": -2.713179588317871, "logps/rejected": -1.6609292030334473, "loss": 0.5312, "rewards/accuracies": 1.0, "rewards/chosen": 0.5813414454460144, "rewards/margins": 0.022483408451080322, "rewards/rejected": 0.5588580369949341, "step": 1575 }, { "epoch": 2.85, "learning_rate": 6.551385481195438e-10, "logits/chosen": -2.257333517074585, "logits/rejected": -2.3185839653015137, "logps/chosen": -3.385096549987793, "logps/rejected": -10.400345802307129, "loss": 0.5619, "rewards/accuracies": 1.0, "rewards/chosen": 0.8434903025627136, "rewards/margins": 0.19035488367080688, "rewards/rejected": 0.6531354188919067, "step": 1576 }, { "epoch": 2.85, "learning_rate": 6.394807109950051e-10, "logits/chosen": -2.330110788345337, "logits/rejected": -2.344778060913086, "logps/chosen": -2.0982961654663086, "logps/rejected": -5.097617149353027, "loss": 0.6434, "rewards/accuracies": 0.0, "rewards/chosen": 0.7334157824516296, "rewards/margins": -0.04578918218612671, "rewards/rejected": 0.7792049646377563, "step": 1577 }, { "epoch": 2.85, "learning_rate": 6.2401105117319e-10, "logits/chosen": -2.4121925830841064, "logits/rejected": -2.4191768169403076, "logps/chosen": -1.9021615982055664, "logps/rejected": -2.534602642059326, "loss": 0.6742, "rewards/accuracies": 0.0, "rewards/chosen": 0.6305832862854004, "rewards/margins": -0.13516801595687866, "rewards/rejected": 0.765751302242279, "step": 1578 }, { "epoch": 2.86, "learning_rate": 6.087296276291342e-10, "logits/chosen": -2.275347948074341, "logits/rejected": -2.2798774242401123, "logps/chosen": -5.5966973304748535, "logps/rejected": -2.8438761234283447, "loss": 0.5828, "rewards/accuracies": 0.0, "rewards/chosen": 0.44236475229263306, "rewards/margins": -0.2709035277366638, "rewards/rejected": 0.7132682800292969, "step": 1579 }, { "epoch": 2.86, "learning_rate": 5.936364986202757e-10, "logits/chosen": -2.3421401977539062, "logits/rejected": -2.405679702758789, "logps/chosen": -1.8095619678497314, "logps/rejected": -19.85068702697754, "loss": 0.615, "rewards/accuracies": 1.0, "rewards/chosen": 0.5798997282981873, "rewards/margins": 0.10129693150520325, "rewards/rejected": 0.478602796792984, "step": 1580 }, { "epoch": 2.86, "learning_rate": 5.787317216862153e-10, "logits/chosen": -2.461960554122925, "logits/rejected": -2.397321939468384, "logps/chosen": -19.605987548828125, "logps/rejected": -3.1645569801330566, "loss": 0.7266, "rewards/accuracies": 0.0, "rewards/chosen": 0.02538013458251953, "rewards/margins": -0.5731340050697327, "rewards/rejected": 0.5985141396522522, "step": 1581 }, { "epoch": 2.86, "learning_rate": 5.640153536484837e-10, "logits/chosen": -2.3379039764404297, "logits/rejected": -2.324821710586548, "logps/chosen": -5.157507419586182, "logps/rejected": -3.093364715576172, "loss": 0.4622, "rewards/accuracies": 1.0, "rewards/chosen": 0.9637823104858398, "rewards/margins": 0.4413006901741028, "rewards/rejected": 0.5224816203117371, "step": 1582 }, { "epoch": 2.86, "learning_rate": 5.49487450610353e-10, "logits/chosen": -2.2312827110290527, "logits/rejected": -2.2226290702819824, "logps/chosen": -3.6446664333343506, "logps/rejected": -2.6914520263671875, "loss": 0.6253, "rewards/accuracies": 1.0, "rewards/chosen": 0.8173613548278809, "rewards/margins": 0.1104390025138855, "rewards/rejected": 0.7069223523139954, "step": 1583 }, { "epoch": 2.86, "learning_rate": 5.351480679566034e-10, "logits/chosen": -2.369542360305786, "logits/rejected": -2.3637349605560303, "logps/chosen": -3.5756032466888428, "logps/rejected": -2.552639961242676, "loss": 0.5609, "rewards/accuracies": 1.0, "rewards/chosen": 0.7782676815986633, "rewards/margins": 0.3147169053554535, "rewards/rejected": 0.46355077624320984, "step": 1584 }, { "epoch": 2.87, "learning_rate": 5.209972603533286e-10, "logits/chosen": -2.2712619304656982, "logits/rejected": -2.411959409713745, "logps/chosen": -8.089130401611328, "logps/rejected": -33.252925872802734, "loss": 0.7725, "rewards/accuracies": 0.0, "rewards/chosen": 0.6075855493545532, "rewards/margins": -0.1494903564453125, "rewards/rejected": 0.7570759057998657, "step": 1585 }, { "epoch": 2.87, "learning_rate": 5.070350817476976e-10, "logits/chosen": -2.2788615226745605, "logits/rejected": -2.268573522567749, "logps/chosen": -3.7292842864990234, "logps/rejected": -2.956198215484619, "loss": 0.5793, "rewards/accuracies": 1.0, "rewards/chosen": 0.754533588886261, "rewards/margins": 0.37132954597473145, "rewards/rejected": 0.38320404291152954, "step": 1586 }, { "epoch": 2.87, "learning_rate": 4.932615853677879e-10, "logits/chosen": -2.3404102325439453, "logits/rejected": -2.3385653495788574, "logps/chosen": -0.8689107894897461, "logps/rejected": -2.737919569015503, "loss": 0.6679, "rewards/accuracies": 1.0, "rewards/chosen": 0.5716001391410828, "rewards/margins": 0.10360047221183777, "rewards/rejected": 0.467999666929245, "step": 1587 }, { "epoch": 2.87, "learning_rate": 4.796768237223414e-10, "logits/chosen": -2.184422731399536, "logits/rejected": -2.225377321243286, "logps/chosen": -6.710679054260254, "logps/rejected": -20.47834014892578, "loss": 0.5648, "rewards/accuracies": 1.0, "rewards/chosen": 0.9954472780227661, "rewards/margins": 0.6664725542068481, "rewards/rejected": 0.32897472381591797, "step": 1588 }, { "epoch": 2.87, "learning_rate": 4.662808486006086e-10, "logits/chosen": -2.248272657394409, "logits/rejected": -2.2522363662719727, "logps/chosen": -6.032689094543457, "logps/rejected": -4.552128791809082, "loss": 0.67, "rewards/accuracies": 1.0, "rewards/chosen": 1.0854805707931519, "rewards/margins": 0.5509078502655029, "rewards/rejected": 0.5345727205276489, "step": 1589 }, { "epoch": 2.88, "learning_rate": 4.5307371107211613e-10, "logits/chosen": -2.295001983642578, "logits/rejected": -2.294083595275879, "logps/chosen": -3.6275441646575928, "logps/rejected": -3.747837543487549, "loss": 0.6886, "rewards/accuracies": 1.0, "rewards/chosen": 0.6088722348213196, "rewards/margins": 0.03410661220550537, "rewards/rejected": 0.5747656226158142, "step": 1590 }, { "epoch": 2.88, "learning_rate": 4.4005546148649373e-10, "logits/chosen": -2.232187271118164, "logits/rejected": -2.198674440383911, "logps/chosen": -2.1084344387054443, "logps/rejected": 0.0, "loss": 0.5422, "rewards/accuracies": 1.0, "rewards/chosen": 0.7289373278617859, "rewards/margins": 0.7289373278617859, "rewards/rejected": 0.0, "step": 1591 }, { "epoch": 2.88, "learning_rate": 4.272261494732532e-10, "logits/chosen": -2.2897071838378906, "logits/rejected": -2.2895710468292236, "logps/chosen": -1.1811151504516602, "logps/rejected": -5.790553092956543, "loss": 0.5547, "rewards/accuracies": 1.0, "rewards/chosen": 0.6391497850418091, "rewards/margins": 0.2885204553604126, "rewards/rejected": 0.3506293296813965, "step": 1592 }, { "epoch": 2.88, "learning_rate": 4.145858239416378e-10, "logits/chosen": -2.2972426414489746, "logits/rejected": -2.303434371948242, "logps/chosen": -1.4235121011734009, "logps/rejected": -2.977663516998291, "loss": 0.5617, "rewards/accuracies": 1.0, "rewards/chosen": 0.6473793387413025, "rewards/margins": 0.24976742267608643, "rewards/rejected": 0.39761191606521606, "step": 1593 }, { "epoch": 2.88, "learning_rate": 4.02134533080406e-10, "logits/chosen": -2.3054518699645996, "logits/rejected": -2.308572292327881, "logps/chosen": -13.964836120605469, "logps/rejected": -7.109671115875244, "loss": 0.8495, "rewards/accuracies": 1.0, "rewards/chosen": 0.8924760818481445, "rewards/margins": 0.22719818353652954, "rewards/rejected": 0.665277898311615, "step": 1594 }, { "epoch": 2.88, "learning_rate": 3.898723243576707e-10, "logits/chosen": -2.3638272285461426, "logits/rejected": -2.3820481300354004, "logps/chosen": -3.807896375656128, "logps/rejected": -8.756929397583008, "loss": 0.595, "rewards/accuracies": 1.0, "rewards/chosen": 0.7704567313194275, "rewards/margins": 0.32845672965049744, "rewards/rejected": 0.44200000166893005, "step": 1595 }, { "epoch": 2.89, "learning_rate": 3.777992445206934e-10, "logits/chosen": -2.323349714279175, "logits/rejected": -2.3232271671295166, "logps/chosen": -1.8336189985275269, "logps/rejected": -2.9673731327056885, "loss": 0.6941, "rewards/accuracies": 0.0, "rewards/chosen": 0.5110665559768677, "rewards/margins": -0.13660019636154175, "rewards/rejected": 0.6476667523384094, "step": 1596 }, { "epoch": 2.89, "learning_rate": 3.6591533959572927e-10, "logits/chosen": -2.459925651550293, "logits/rejected": -2.5759310722351074, "logps/chosen": -3.1785054206848145, "logps/rejected": -18.617366790771484, "loss": 0.5355, "rewards/accuracies": 1.0, "rewards/chosen": 0.8021432757377625, "rewards/margins": 0.7578104138374329, "rewards/rejected": 0.04433288797736168, "step": 1597 }, { "epoch": 2.89, "learning_rate": 3.5422065488782683e-10, "logits/chosen": -2.3821067810058594, "logits/rejected": -2.389760971069336, "logps/chosen": -2.2097721099853516, "logps/rejected": -8.82040023803711, "loss": 0.676, "rewards/accuracies": 1.0, "rewards/chosen": 0.9002619981765747, "rewards/margins": 0.5522836446762085, "rewards/rejected": 0.3479783236980438, "step": 1598 }, { "epoch": 2.89, "learning_rate": 3.427152349806894e-10, "logits/chosen": -2.3693625926971436, "logits/rejected": -2.3683698177337646, "logps/chosen": -0.8713809251785278, "logps/rejected": -10.107380867004395, "loss": 0.4984, "rewards/accuracies": 1.0, "rewards/chosen": 0.6882097721099854, "rewards/margins": 0.48312926292419434, "rewards/rejected": 0.20508050918579102, "step": 1599 }, { "epoch": 2.89, "learning_rate": 3.313991237364644e-10, "logits/chosen": -2.398867607116699, "logits/rejected": -2.3859293460845947, "logps/chosen": -3.6965620517730713, "logps/rejected": -5.050409317016602, "loss": 0.825, "rewards/accuracies": 0.0, "rewards/chosen": 0.703393280506134, "rewards/margins": -0.29956871271133423, "rewards/rejected": 1.0029619932174683, "step": 1600 }, { "epoch": 2.9, "learning_rate": 3.202723642956151e-10, "logits/chosen": -2.2702996730804443, "logits/rejected": -2.278496265411377, "logps/chosen": -1.4843987226486206, "logps/rejected": -3.264647960662842, "loss": 0.6132, "rewards/accuracies": 1.0, "rewards/chosen": 0.6156713962554932, "rewards/margins": 0.01646709442138672, "rewards/rejected": 0.5992043018341064, "step": 1601 }, { "epoch": 2.9, "learning_rate": 3.093349990767269e-10, "logits/chosen": -2.2783730030059814, "logits/rejected": -2.2774901390075684, "logps/chosen": -2.790562629699707, "logps/rejected": -2.803314447402954, "loss": 0.5946, "rewards/accuracies": 0.0, "rewards/chosen": 0.42548590898513794, "rewards/margins": -0.18799084424972534, "rewards/rejected": 0.6134767532348633, "step": 1602 }, { "epoch": 2.9, "learning_rate": 2.985870697763682e-10, "logits/chosen": -2.2327816486358643, "logits/rejected": -2.227085590362549, "logps/chosen": -2.6246161460876465, "logps/rejected": -9.495782852172852, "loss": 0.5131, "rewards/accuracies": 1.0, "rewards/chosen": 0.7218607664108276, "rewards/margins": 0.5589028000831604, "rewards/rejected": 0.16295795142650604, "step": 1603 }, { "epoch": 2.9, "learning_rate": 2.8802861736890173e-10, "logits/chosen": -2.2552671432495117, "logits/rejected": -2.328828811645508, "logps/chosen": -2.0306808948516846, "logps/rejected": -23.026458740234375, "loss": 0.5813, "rewards/accuracies": 1.0, "rewards/chosen": 0.635700523853302, "rewards/margins": 0.15713950991630554, "rewards/rejected": 0.47856101393699646, "step": 1604 }, { "epoch": 2.9, "learning_rate": 2.7765968210635703e-10, "logits/chosen": -2.290004253387451, "logits/rejected": -2.291579484939575, "logps/chosen": -4.927360534667969, "logps/rejected": -6.292045593261719, "loss": 0.6993, "rewards/accuracies": 1.0, "rewards/chosen": 1.1445482969284058, "rewards/margins": 0.15399062633514404, "rewards/rejected": 0.9905576705932617, "step": 1605 }, { "epoch": 2.9, "learning_rate": 2.674803035182749e-10, "logits/chosen": -2.3611843585968018, "logits/rejected": -2.363219738006592, "logps/chosen": -1.5267887115478516, "logps/rejected": -3.22184419631958, "loss": 0.6577, "rewards/accuracies": 1.0, "rewards/chosen": 0.8367885947227478, "rewards/margins": 0.08848273754119873, "rewards/rejected": 0.7483058571815491, "step": 1606 }, { "epoch": 2.91, "learning_rate": 2.5749052041153517e-10, "logits/chosen": -2.224838972091675, "logits/rejected": -2.221296548843384, "logps/chosen": -1.270156741142273, "logps/rejected": -6.872069358825684, "loss": 0.6699, "rewards/accuracies": 1.0, "rewards/chosen": 0.5274975895881653, "rewards/margins": 0.015677154064178467, "rewards/rejected": 0.5118204355239868, "step": 1607 }, { "epoch": 2.91, "learning_rate": 2.4769037087022936e-10, "logits/chosen": -2.2564165592193604, "logits/rejected": -2.246375799179077, "logps/chosen": -1.745493769645691, "logps/rejected": -5.473581314086914, "loss": 0.6558, "rewards/accuracies": 0.0, "rewards/chosen": 0.6264331936836243, "rewards/margins": -0.14966171979904175, "rewards/rejected": 0.776094913482666, "step": 1608 }, { "epoch": 2.91, "learning_rate": 2.380798922555105e-10, "logits/chosen": -2.351309061050415, "logits/rejected": -2.355508804321289, "logps/chosen": -1.4495882987976074, "logps/rejected": -2.508756160736084, "loss": 0.726, "rewards/accuracies": 1.0, "rewards/chosen": 0.8328763842582703, "rewards/margins": 0.24275678396224976, "rewards/rejected": 0.5901196002960205, "step": 1609 }, { "epoch": 2.91, "learning_rate": 2.286591212054323e-10, "logits/chosen": -2.301668643951416, "logits/rejected": -2.302562952041626, "logps/chosen": -1.4226630926132202, "logps/rejected": -2.887716293334961, "loss": 0.6518, "rewards/accuracies": 1.0, "rewards/chosen": 0.6801550388336182, "rewards/margins": 0.15888220071792603, "rewards/rejected": 0.5212728381156921, "step": 1610 }, { "epoch": 2.91, "learning_rate": 2.1942809363484914e-10, "logits/chosen": -2.333746910095215, "logits/rejected": -2.3343281745910645, "logps/chosen": -1.4045732021331787, "logps/rejected": -6.471251487731934, "loss": 0.6457, "rewards/accuracies": 1.0, "rewards/chosen": 0.8016433715820312, "rewards/margins": 0.46096792817115784, "rewards/rejected": 0.3406754434108734, "step": 1611 }, { "epoch": 2.92, "learning_rate": 2.103868447352386e-10, "logits/chosen": -2.3489925861358643, "logits/rejected": -2.346113920211792, "logps/chosen": -1.1203279495239258, "logps/rejected": -5.200836181640625, "loss": 0.6325, "rewards/accuracies": 1.0, "rewards/chosen": 0.48847922682762146, "rewards/margins": 0.03263223171234131, "rewards/rejected": 0.45584699511528015, "step": 1612 }, { "epoch": 2.92, "learning_rate": 2.0153540897459022e-10, "logits/chosen": -2.250472068786621, "logits/rejected": -2.257097005844116, "logps/chosen": -1.4366638660430908, "logps/rejected": -2.547555923461914, "loss": 0.7414, "rewards/accuracies": 1.0, "rewards/chosen": 0.6890531778335571, "rewards/margins": 0.1817811131477356, "rewards/rejected": 0.5072720646858215, "step": 1613 }, { "epoch": 2.92, "learning_rate": 1.9287382009726683e-10, "logits/chosen": -2.3515357971191406, "logits/rejected": -2.3236324787139893, "logps/chosen": -1.3665852546691895, "logps/rejected": 0.0, "loss": 0.6021, "rewards/accuracies": 1.0, "rewards/chosen": 0.6902549266815186, "rewards/margins": 0.6902549266815186, "rewards/rejected": 0.0, "step": 1614 }, { "epoch": 2.92, "learning_rate": 1.8440211112388249e-10, "logits/chosen": -2.1472573280334473, "logits/rejected": -2.154637336730957, "logps/chosen": -0.5222416520118713, "logps/rejected": -4.071493148803711, "loss": 0.6394, "rewards/accuracies": 1.0, "rewards/chosen": 0.6999755501747131, "rewards/margins": 0.2886216640472412, "rewards/rejected": 0.4113538861274719, "step": 1615 }, { "epoch": 2.92, "learning_rate": 1.761203143511636e-10, "logits/chosen": -2.184321641921997, "logits/rejected": -2.1837682723999023, "logps/chosen": -1.4073219299316406, "logps/rejected": -6.980435848236084, "loss": 0.5333, "rewards/accuracies": 1.0, "rewards/chosen": 0.6069874167442322, "rewards/margins": 0.35256144404411316, "rewards/rejected": 0.254425972700119, "step": 1616 }, { "epoch": 2.92, "learning_rate": 1.680284613518379e-10, "logits/chosen": -2.2397446632385254, "logits/rejected": -2.241987466812134, "logps/chosen": -6.504635810852051, "logps/rejected": -5.705074310302734, "loss": 0.6879, "rewards/accuracies": 1.0, "rewards/chosen": 0.49716416001319885, "rewards/margins": 0.021257877349853516, "rewards/rejected": 0.47590628266334534, "step": 1617 }, { "epoch": 2.93, "learning_rate": 1.6012658297450688e-10, "logits/chosen": -2.283015489578247, "logits/rejected": -2.2879185676574707, "logps/chosen": -4.852474212646484, "logps/rejected": -5.924814224243164, "loss": 0.5997, "rewards/accuracies": 1.0, "rewards/chosen": 0.559985339641571, "rewards/margins": 0.21836316585540771, "rewards/rejected": 0.34162217378616333, "step": 1618 }, { "epoch": 2.93, "learning_rate": 1.5241470934352906e-10, "logits/chosen": -2.328512668609619, "logits/rejected": -2.332659959793091, "logps/chosen": -1.9073528051376343, "logps/rejected": -1.7487967014312744, "loss": 0.6112, "rewards/accuracies": 1.0, "rewards/chosen": 0.5363285541534424, "rewards/margins": 0.05895999073982239, "rewards/rejected": 0.47736856341362, "step": 1619 }, { "epoch": 2.93, "learning_rate": 1.448928698589147e-10, "logits/chosen": -2.195228338241577, "logits/rejected": -2.2147727012634277, "logps/chosen": -1.6663910150527954, "logps/rejected": -12.701762199401855, "loss": 0.5326, "rewards/accuracies": 1.0, "rewards/chosen": 0.721141517162323, "rewards/margins": 0.3734802007675171, "rewards/rejected": 0.3476613163948059, "step": 1620 }, { "epoch": 2.93, "learning_rate": 1.3756109319620346e-10, "logits/chosen": -2.417809009552002, "logits/rejected": -2.421996831893921, "logps/chosen": -1.7359291315078735, "logps/rejected": -10.422470092773438, "loss": 0.7098, "rewards/accuracies": 0.0, "rewards/chosen": 0.8798902630805969, "rewards/margins": -0.2815791964530945, "rewards/rejected": 1.1614694595336914, "step": 1621 }, { "epoch": 2.93, "learning_rate": 1.3041940730635358e-10, "logits/chosen": -2.115511417388916, "logits/rejected": -2.106628179550171, "logps/chosen": -2.4688148498535156, "logps/rejected": -5.973416805267334, "loss": 0.7438, "rewards/accuracies": 0.0, "rewards/chosen": 0.5280908942222595, "rewards/margins": -0.23930275440216064, "rewards/rejected": 0.7673936486244202, "step": 1622 }, { "epoch": 2.93, "learning_rate": 1.2346783941564741e-10, "logits/chosen": -2.3712034225463867, "logits/rejected": -2.3436968326568604, "logps/chosen": -2.3793857097625732, "logps/rejected": 0.0, "loss": 0.6023, "rewards/accuracies": 1.0, "rewards/chosen": 0.7670039534568787, "rewards/margins": 0.7670039534568787, "rewards/rejected": 0.0, "step": 1623 }, { "epoch": 2.94, "learning_rate": 1.167064160255693e-10, "logits/chosen": -2.2510645389556885, "logits/rejected": -2.399339199066162, "logps/chosen": -2.933655261993408, "logps/rejected": -28.226736068725586, "loss": 0.634, "rewards/accuracies": 1.0, "rewards/chosen": 0.5379738211631775, "rewards/margins": 0.3055467903614044, "rewards/rejected": 0.23242703080177307, "step": 1624 }, { "epoch": 2.94, "learning_rate": 1.1013516291272229e-10, "logits/chosen": -2.4031054973602295, "logits/rejected": -2.399289846420288, "logps/chosen": -6.516195297241211, "logps/rejected": -7.0836262702941895, "loss": 0.7335, "rewards/accuracies": 0.0, "rewards/chosen": 1.0361543893814087, "rewards/margins": -0.16377508640289307, "rewards/rejected": 1.1999294757843018, "step": 1625 }, { "epoch": 2.94, "learning_rate": 1.0375410512871719e-10, "logits/chosen": -2.304626226425171, "logits/rejected": -2.3078489303588867, "logps/chosen": -6.250345706939697, "logps/rejected": -0.8326199054718018, "loss": 0.7913, "rewards/accuracies": 1.0, "rewards/chosen": 0.704943060874939, "rewards/margins": 0.09648454189300537, "rewards/rejected": 0.6084585189819336, "step": 1626 }, { "epoch": 2.94, "learning_rate": 9.756326700009477e-11, "logits/chosen": -2.257380723953247, "logits/rejected": -2.256568193435669, "logps/chosen": -0.5965788960456848, "logps/rejected": -2.3043036460876465, "loss": 0.6376, "rewards/accuracies": 1.0, "rewards/chosen": 0.6758427023887634, "rewards/margins": 0.2359551191329956, "rewards/rejected": 0.4398875832557678, "step": 1627 }, { "epoch": 2.94, "learning_rate": 9.156267212820367e-11, "logits/chosen": -2.45835542678833, "logits/rejected": -2.462904930114746, "logps/chosen": -2.3791284561157227, "logps/rejected": -1.6425801515579224, "loss": 0.5538, "rewards/accuracies": 1.0, "rewards/chosen": 0.7397290468215942, "rewards/margins": 0.13988536596298218, "rewards/rejected": 0.5998436808586121, "step": 1628 }, { "epoch": 2.95, "learning_rate": 8.575234338913939e-11, "logits/chosen": -2.32434344291687, "logits/rejected": -2.3692169189453125, "logps/chosen": -1.7905926704406738, "logps/rejected": -25.006099700927734, "loss": 0.8645, "rewards/accuracies": 0.0, "rewards/chosen": 0.4963183104991913, "rewards/margins": -0.20010808110237122, "rewards/rejected": 0.6964263916015625, "step": 1629 }, { "epoch": 2.95, "learning_rate": 8.01323029336387e-11, "logits/chosen": -2.37489652633667, "logits/rejected": -2.371920585632324, "logps/chosen": -4.786995887756348, "logps/rejected": -3.5085134506225586, "loss": 0.6296, "rewards/accuracies": 1.0, "rewards/chosen": 0.7961384654045105, "rewards/margins": 0.29654720425605774, "rewards/rejected": 0.49959126114845276, "step": 1630 }, { "epoch": 2.95, "learning_rate": 7.470257218700759e-11, "logits/chosen": -2.317533493041992, "logits/rejected": -2.3673911094665527, "logps/chosen": -0.8211085796356201, "logps/rejected": -8.297365188598633, "loss": 0.7103, "rewards/accuracies": 0.0, "rewards/chosen": 0.5335419774055481, "rewards/margins": -0.39570438861846924, "rewards/rejected": 0.9292463660240173, "step": 1631 }, { "epoch": 2.95, "learning_rate": 6.946317184902128e-11, "logits/chosen": -2.3738536834716797, "logits/rejected": -2.3638949394226074, "logps/chosen": -7.3828911781311035, "logps/rejected": -1.8896480798721313, "loss": 0.5469, "rewards/accuracies": 1.0, "rewards/chosen": 1.0122088193893433, "rewards/margins": 0.3671717047691345, "rewards/rejected": 0.6450371146202087, "step": 1632 }, { "epoch": 2.95, "learning_rate": 6.441412189387985e-11, "logits/chosen": -2.356393337249756, "logits/rejected": -2.3555219173431396, "logps/chosen": -1.4930074214935303, "logps/rejected": -1.6068189144134521, "loss": 0.6161, "rewards/accuracies": 1.0, "rewards/chosen": 1.1572895050048828, "rewards/margins": 0.33650290966033936, "rewards/rejected": 0.8207865953445435, "step": 1633 }, { "epoch": 2.95, "learning_rate": 5.955544157008052e-11, "logits/chosen": -2.2872211933135986, "logits/rejected": -2.28135347366333, "logps/chosen": -8.981513977050781, "logps/rejected": -2.7129433155059814, "loss": 0.6629, "rewards/accuracies": 1.0, "rewards/chosen": 0.6621743440628052, "rewards/margins": 0.03515046834945679, "rewards/rejected": 0.6270238757133484, "step": 1634 }, { "epoch": 2.96, "learning_rate": 5.488714940040662e-11, "logits/chosen": -2.3593013286590576, "logits/rejected": -2.3817644119262695, "logps/chosen": -5.748210906982422, "logps/rejected": -13.930611610412598, "loss": 0.7611, "rewards/accuracies": 0.0, "rewards/chosen": 0.5693023800849915, "rewards/margins": -0.20186030864715576, "rewards/rejected": 0.7711626887321472, "step": 1635 }, { "epoch": 2.96, "learning_rate": 5.040926318179428e-11, "logits/chosen": -2.370925188064575, "logits/rejected": -2.463911771774292, "logps/chosen": -1.8469617366790771, "logps/rejected": -26.839313507080078, "loss": 0.577, "rewards/accuracies": 1.0, "rewards/chosen": 0.5244914889335632, "rewards/margins": 0.2473752796649933, "rewards/rejected": 0.27711620926856995, "step": 1636 }, { "epoch": 2.96, "learning_rate": 4.6121799985310297e-11, "logits/chosen": -2.426229238510132, "logits/rejected": -2.4319865703582764, "logps/chosen": -2.4126884937286377, "logps/rejected": -2.108490467071533, "loss": 0.6234, "rewards/accuracies": 1.0, "rewards/chosen": 0.9700350761413574, "rewards/margins": 0.30142396688461304, "rewards/rejected": 0.6686111092567444, "step": 1637 }, { "epoch": 2.96, "learning_rate": 4.202477615606881e-11, "logits/chosen": -2.229367971420288, "logits/rejected": -2.2374300956726074, "logps/chosen": -3.9267992973327637, "logps/rejected": -5.948910713195801, "loss": 0.6344, "rewards/accuracies": 1.0, "rewards/chosen": 0.7274277210235596, "rewards/margins": 0.3721597194671631, "rewards/rejected": 0.3552680015563965, "step": 1638 }, { "epoch": 2.96, "learning_rate": 3.811820731317028e-11, "logits/chosen": -2.2769689559936523, "logits/rejected": -2.2765052318573, "logps/chosen": -4.267375469207764, "logps/rejected": -8.395048141479492, "loss": 0.6327, "rewards/accuracies": 0.0, "rewards/chosen": 0.9746460318565369, "rewards/margins": -0.25261324644088745, "rewards/rejected": 1.2272592782974243, "step": 1639 }, { "epoch": 2.97, "learning_rate": 3.440210834964041e-11, "logits/chosen": -2.260150909423828, "logits/rejected": -2.2580740451812744, "logps/chosen": -9.258129119873047, "logps/rejected": -2.186492443084717, "loss": 0.7019, "rewards/accuracies": 0.0, "rewards/chosen": 0.42913493514060974, "rewards/margins": -0.11529889702796936, "rewards/rejected": 0.5444338321685791, "step": 1640 }, { "epoch": 2.97, "learning_rate": 3.087649343238019e-11, "logits/chosen": -2.3556864261627197, "logits/rejected": -2.361332416534424, "logps/chosen": -3.024639368057251, "logps/rejected": -3.4080400466918945, "loss": 0.5962, "rewards/accuracies": 1.0, "rewards/chosen": 0.5819043517112732, "rewards/margins": 0.21122172474861145, "rewards/rejected": 0.37068262696266174, "step": 1641 }, { "epoch": 2.97, "learning_rate": 2.754137600209927e-11, "logits/chosen": -2.2399938106536865, "logits/rejected": -2.23110032081604, "logps/chosen": -3.1951937675476074, "logps/rejected": -2.037712574005127, "loss": 0.4364, "rewards/accuracies": 1.0, "rewards/chosen": 0.7560226321220398, "rewards/margins": 0.2992309033870697, "rewards/rejected": 0.4567917287349701, "step": 1642 }, { "epoch": 2.97, "learning_rate": 2.4396768773288222e-11, "logits/chosen": -2.348926305770874, "logits/rejected": -2.3441426753997803, "logps/chosen": -1.6841051578521729, "logps/rejected": -9.790316581726074, "loss": 0.5694, "rewards/accuracies": 1.0, "rewards/chosen": 0.8274971842765808, "rewards/margins": 0.5773276090621948, "rewards/rejected": 0.250169575214386, "step": 1643 }, { "epoch": 2.97, "learning_rate": 2.144268373414082e-11, "logits/chosen": -2.3626492023468018, "logits/rejected": -2.365602970123291, "logps/chosen": -1.5793521404266357, "logps/rejected": -0.9510278701782227, "loss": 0.725, "rewards/accuracies": 0.0, "rewards/chosen": 0.556318461894989, "rewards/margins": -0.10365509986877441, "rewards/rejected": 0.6599735617637634, "step": 1644 }, { "epoch": 2.97, "learning_rate": 1.8679132146526276e-11, "logits/chosen": -2.3608052730560303, "logits/rejected": -2.349673271179199, "logps/chosen": -4.724174499511719, "logps/rejected": -2.686227798461914, "loss": 0.7705, "rewards/accuracies": 1.0, "rewards/chosen": 0.746986985206604, "rewards/margins": 0.18843472003936768, "rewards/rejected": 0.5585522651672363, "step": 1645 }, { "epoch": 2.98, "learning_rate": 1.6106124545950395e-11, "logits/chosen": -2.3328042030334473, "logits/rejected": -2.30942964553833, "logps/chosen": -3.1458067893981934, "logps/rejected": 0.0, "loss": 0.4912, "rewards/accuracies": 1.0, "rewards/chosen": 0.9998283386230469, "rewards/margins": 0.9998283386230469, "rewards/rejected": 0.0, "step": 1646 }, { "epoch": 2.98, "learning_rate": 1.3723670741488946e-11, "logits/chosen": -2.198678731918335, "logits/rejected": -2.228160858154297, "logps/chosen": -1.6580069065093994, "logps/rejected": -5.113583564758301, "loss": 0.7188, "rewards/accuracies": 0.0, "rewards/chosen": 0.6254798173904419, "rewards/margins": -0.3766893148422241, "rewards/rejected": 1.002169132232666, "step": 1647 }, { "epoch": 2.98, "learning_rate": 1.1531779815787678e-11, "logits/chosen": -2.3547184467315674, "logits/rejected": -2.30319881439209, "logps/chosen": -9.165556907653809, "logps/rejected": -3.175917148590088, "loss": 0.6782, "rewards/accuracies": 1.0, "rewards/chosen": 0.8337355852127075, "rewards/margins": 0.4472106099128723, "rewards/rejected": 0.3865249752998352, "step": 1648 }, { "epoch": 2.98, "learning_rate": 9.530460124995699e-12, "logits/chosen": -2.182403326034546, "logits/rejected": -2.2647366523742676, "logps/chosen": -1.3911839723587036, "logps/rejected": -27.39896011352539, "loss": 0.6503, "rewards/accuracies": 0.0, "rewards/chosen": 0.6458284854888916, "rewards/margins": -0.1677030324935913, "rewards/rejected": 0.8135315179824829, "step": 1649 }, { "epoch": 2.98, "learning_rate": 7.719719298754368e-12, "logits/chosen": -2.182966709136963, "logits/rejected": -2.338798761367798, "logps/chosen": -1.0825026035308838, "logps/rejected": -28.26113510131836, "loss": 0.483, "rewards/accuracies": 1.0, "rewards/chosen": 0.8509578704833984, "rewards/margins": 0.5424970388412476, "rewards/rejected": 0.3084608018398285, "step": 1650 }, { "epoch": 2.99, "learning_rate": 6.099564240164001e-12, "logits/chosen": -2.2261221408843994, "logits/rejected": -2.1948060989379883, "logps/chosen": -1.9047904014587402, "logps/rejected": 0.0, "loss": 0.5445, "rewards/accuracies": 1.0, "rewards/chosen": 0.5809171199798584, "rewards/margins": 0.5809171199798584, "rewards/rejected": 0.0, "step": 1651 }, { "epoch": 2.99, "learning_rate": 4.670001125739453e-12, "logits/chosen": -2.325587272644043, "logits/rejected": -2.32700252532959, "logps/chosen": -1.2697770595550537, "logps/rejected": -1.5561351776123047, "loss": 0.7178, "rewards/accuracies": 1.0, "rewards/chosen": 0.597678005695343, "rewards/margins": 0.11742368340492249, "rewards/rejected": 0.48025432229042053, "step": 1652 }, { "epoch": 2.99, "learning_rate": 3.4310354054101207e-12, "logits/chosen": -2.3176662921905518, "logits/rejected": -2.290489435195923, "logps/chosen": -0.936247706413269, "logps/rejected": 0.0, "loss": 0.5687, "rewards/accuracies": 1.0, "rewards/chosen": 0.5810962915420532, "rewards/margins": 0.5810962915420532, "rewards/rejected": 0.0, "step": 1653 }, { "epoch": 2.99, "learning_rate": 2.3826718024977375e-12, "logits/chosen": -2.3566315174102783, "logits/rejected": -2.3562726974487305, "logps/chosen": -10.44543743133545, "logps/rejected": -1.2026417255401611, "loss": 0.7273, "rewards/accuracies": 0.0, "rewards/chosen": 0.4995482563972473, "rewards/margins": -0.17843759059906006, "rewards/rejected": 0.6779858469963074, "step": 1654 }, { "epoch": 2.99, "learning_rate": 1.5249143136775167e-12, "logits/chosen": -2.196876287460327, "logits/rejected": -2.235673427581787, "logps/chosen": -1.621289610862732, "logps/rejected": -24.272022247314453, "loss": 0.4862, "rewards/accuracies": 1.0, "rewards/chosen": 0.6046828627586365, "rewards/margins": 0.24568530917167664, "rewards/rejected": 0.35899755358695984, "step": 1655 }, { "epoch": 2.99, "learning_rate": 8.577662089837012e-13, "logits/chosen": -2.2633533477783203, "logits/rejected": -2.2442617416381836, "logps/chosen": -4.7551140785217285, "logps/rejected": -3.3309552669525146, "loss": 0.5299, "rewards/accuracies": 1.0, "rewards/chosen": 0.8159656524658203, "rewards/margins": 0.4327843487262726, "rewards/rejected": 0.38318130373954773, "step": 1656 }, { "epoch": 3.0, "learning_rate": 3.812300317818096e-13, "logits/chosen": -2.2891995906829834, "logits/rejected": -2.3559720516204834, "logps/chosen": -1.7546734809875488, "logps/rejected": -20.347156524658203, "loss": 0.7205, "rewards/accuracies": 1.0, "rewards/chosen": 0.557797372341156, "rewards/margins": 0.10950329899787903, "rewards/rejected": 0.448294073343277, "step": 1657 }, { "epoch": 3.0, "learning_rate": 9.530759877973693e-14, "logits/chosen": -2.3838045597076416, "logits/rejected": -2.3816397190093994, "logps/chosen": -1.59548819065094, "logps/rejected": -8.24104118347168, "loss": 0.6118, "rewards/accuracies": 1.0, "rewards/chosen": 0.929692268371582, "rewards/margins": 0.39571481943130493, "rewards/rejected": 0.5339774489402771, "step": 1658 }, { "epoch": 3.0, "learning_rate": 0.0, "logits/chosen": -2.2739269733428955, "logits/rejected": -2.270918607711792, "logps/chosen": -6.678603649139404, "logps/rejected": -4.754262924194336, "loss": 0.4733, "rewards/accuracies": 1.0, "rewards/chosen": 0.9610788226127625, "rewards/margins": 0.5393661260604858, "rewards/rejected": 0.4217126965522766, "step": 1659 }, { "epoch": 3.0, "step": 1659, "total_flos": 0.0, "train_loss": 0.6420376384524138, "train_runtime": 10916.3248, "train_samples_per_second": 0.304, "train_steps_per_second": 0.152 } ], "logging_steps": 1.0, "max_steps": 1659, "num_train_epochs": 3, "save_steps": 200, "total_flos": 0.0, "trial_name": null, "trial_params": null }