{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.998022412656559, "eval_steps": 569, "global_step": 1137, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0026367831245880024, "grad_norm": 42.03252029418945, "learning_rate": 8e-07, "log_odds_chosen": 0.5063995122909546, "log_odds_ratio": -0.5107995867729187, "logits/chosen": -0.22655925154685974, "logits/rejected": -0.2056657373905182, "logps/chosen": -3.680752992630005, "logps/rejected": -4.1695122718811035, "loss": 10.4272, "nll_loss": 2.555725574493408, "rewards/accuracies": 0.875, "rewards/chosen": -0.36807531118392944, "rewards/margins": 0.04887596145272255, "rewards/rejected": -0.4169512689113617, "step": 1 }, { "epoch": 0.005273566249176005, "grad_norm": 41.015140533447266, "learning_rate": 1.6e-06, "log_odds_chosen": 0.1609557569026947, "log_odds_ratio": -0.6170504093170166, "logits/chosen": -0.30842357873916626, "logits/rejected": -0.3016989827156067, "logps/chosen": -4.0163726806640625, "logps/rejected": -4.173500061035156, "loss": 10.9832, "nll_loss": 2.6840877532958984, "rewards/accuracies": 1.0, "rewards/chosen": -0.4016372561454773, "rewards/margins": 0.01571274921298027, "rewards/rejected": -0.41734999418258667, "step": 2 }, { "epoch": 0.007910349373764008, "grad_norm": 39.962039947509766, "learning_rate": 2.4e-06, "log_odds_chosen": 0.5912856459617615, "log_odds_ratio": -0.4688023030757904, "logits/chosen": -0.3613507151603699, "logits/rejected": -0.3267977833747864, "logps/chosen": -4.372102737426758, "logps/rejected": -4.95222282409668, "loss": 10.5225, "nll_loss": 2.583754062652588, "rewards/accuracies": 0.75, "rewards/chosen": -0.4372102618217468, "rewards/margins": 0.0580119863152504, "rewards/rejected": -0.495222270488739, "step": 3 }, { "epoch": 0.01054713249835201, "grad_norm": 40.72674560546875, "learning_rate": 3.2e-06, "log_odds_chosen": 0.4822363257408142, "log_odds_ratio": -0.5072091817855835, "logits/chosen": -0.3044830560684204, "logits/rejected": -0.28497734665870667, "logps/chosen": -3.9995956420898438, "logps/rejected": -4.468729019165039, "loss": 10.2201, "nll_loss": 2.5043091773986816, "rewards/accuracies": 0.875, "rewards/chosen": -0.3999595642089844, "rewards/margins": 0.04691329225897789, "rewards/rejected": -0.44687288999557495, "step": 4 }, { "epoch": 0.013183915622940013, "grad_norm": 37.658546447753906, "learning_rate": 4e-06, "log_odds_chosen": 0.44297361373901367, "log_odds_ratio": -0.5074143409729004, "logits/chosen": -0.3302440047264099, "logits/rejected": -0.3137558102607727, "logps/chosen": -3.6166789531707764, "logps/rejected": -4.0442304611206055, "loss": 9.9725, "nll_loss": 2.4423866271972656, "rewards/accuracies": 0.875, "rewards/chosen": -0.3616679012775421, "rewards/margins": 0.04275514930486679, "rewards/rejected": -0.4044230580329895, "step": 5 }, { "epoch": 0.015820698747528016, "grad_norm": 41.972869873046875, "learning_rate": 4.8e-06, "log_odds_chosen": 0.5972580909729004, "log_odds_ratio": -0.46147802472114563, "logits/chosen": -0.30286896228790283, "logits/rejected": -0.287975013256073, "logps/chosen": -4.17189884185791, "logps/rejected": -4.753922462463379, "loss": 10.2496, "nll_loss": 2.516244888305664, "rewards/accuracies": 0.875, "rewards/chosen": -0.41718989610671997, "rewards/margins": 0.0582023561000824, "rewards/rejected": -0.47539228200912476, "step": 6 }, { "epoch": 0.01845748187211602, "grad_norm": 40.42627716064453, "learning_rate": 5.6e-06, "log_odds_chosen": 0.7452040314674377, "log_odds_ratio": -0.4109686017036438, "logits/chosen": -0.2889333963394165, "logits/rejected": -0.279765248298645, "logps/chosen": -3.668855667114258, "logps/rejected": -4.401315689086914, "loss": 10.3841, "nll_loss": 2.5549163818359375, "rewards/accuracies": 1.0, "rewards/chosen": -0.36688560247421265, "rewards/margins": 0.07324595004320145, "rewards/rejected": -0.4401315450668335, "step": 7 }, { "epoch": 0.02109426499670402, "grad_norm": 45.6840934753418, "learning_rate": 6.4e-06, "log_odds_chosen": 0.6613704562187195, "log_odds_ratio": -0.4452037811279297, "logits/chosen": -0.10419394075870514, "logits/rejected": -0.09568381309509277, "logps/chosen": -4.207592487335205, "logps/rejected": -4.860063552856445, "loss": 10.319, "nll_loss": 2.5352323055267334, "rewards/accuracies": 0.875, "rewards/chosen": -0.42075929045677185, "rewards/margins": 0.06524712592363358, "rewards/rejected": -0.48600637912750244, "step": 8 }, { "epoch": 0.023731048121292023, "grad_norm": 36.51398849487305, "learning_rate": 7.2e-06, "log_odds_chosen": 0.5675552487373352, "log_odds_ratio": -0.47734999656677246, "logits/chosen": -0.3029658794403076, "logits/rejected": -0.2848775386810303, "logps/chosen": -3.1933164596557617, "logps/rejected": -3.740905284881592, "loss": 10.0633, "nll_loss": 2.468083381652832, "rewards/accuracies": 0.875, "rewards/chosen": -0.31933167576789856, "rewards/margins": 0.054758865386247635, "rewards/rejected": -0.3740905225276947, "step": 9 }, { "epoch": 0.026367831245880026, "grad_norm": 40.30665969848633, "learning_rate": 8e-06, "log_odds_chosen": 0.508488118648529, "log_odds_ratio": -0.4933461546897888, "logits/chosen": -0.21680974960327148, "logits/rejected": -0.19147902727127075, "logps/chosen": -3.317249298095703, "logps/rejected": -3.810041904449463, "loss": 9.8161, "nll_loss": 2.4046783447265625, "rewards/accuracies": 1.0, "rewards/chosen": -0.33172494173049927, "rewards/margins": 0.04927929490804672, "rewards/rejected": -0.3810042142868042, "step": 10 }, { "epoch": 0.02900461437046803, "grad_norm": 42.89982604980469, "learning_rate": 7.992901508429459e-06, "log_odds_chosen": 0.6581701040267944, "log_odds_ratio": -0.42420172691345215, "logits/chosen": -0.1814369112253189, "logits/rejected": -0.17391075193881989, "logps/chosen": -3.6342430114746094, "logps/rejected": -4.276468753814697, "loss": 9.8359, "nll_loss": 2.416558265686035, "rewards/accuracies": 1.0, "rewards/chosen": -0.36342427134513855, "rewards/margins": 0.06422261893749237, "rewards/rejected": -0.4276469051837921, "step": 11 }, { "epoch": 0.03164139749505603, "grad_norm": 37.45441818237305, "learning_rate": 7.985803016858917e-06, "log_odds_chosen": 0.5965335369110107, "log_odds_ratio": -0.4536648690700531, "logits/chosen": -0.27103012800216675, "logits/rejected": -0.2577633261680603, "logps/chosen": -3.327211856842041, "logps/rejected": -3.900315046310425, "loss": 9.3915, "nll_loss": 2.3025002479553223, "rewards/accuracies": 1.0, "rewards/chosen": -0.33272117376327515, "rewards/margins": 0.057310331612825394, "rewards/rejected": -0.39003151655197144, "step": 12 }, { "epoch": 0.034278180619644036, "grad_norm": 40.52745819091797, "learning_rate": 7.978704525288376e-06, "log_odds_chosen": 0.32027924060821533, "log_odds_ratio": -0.5618823170661926, "logits/chosen": -0.26642847061157227, "logits/rejected": -0.21151301264762878, "logps/chosen": -4.011443138122559, "logps/rejected": -4.31336784362793, "loss": 9.2044, "nll_loss": 2.244905471801758, "rewards/accuracies": 0.75, "rewards/chosen": -0.4011443257331848, "rewards/margins": 0.03019241988658905, "rewards/rejected": -0.43133679032325745, "step": 13 }, { "epoch": 0.03691496374423204, "grad_norm": 34.13896179199219, "learning_rate": 7.971606033717835e-06, "log_odds_chosen": 0.8332005739212036, "log_odds_ratio": -0.36455652117729187, "logits/chosen": -0.34497541189193726, "logits/rejected": -0.3069722056388855, "logps/chosen": -2.7453174591064453, "logps/rejected": -3.534782886505127, "loss": 8.7255, "nll_loss": 2.144930601119995, "rewards/accuracies": 1.0, "rewards/chosen": -0.274531751871109, "rewards/margins": 0.0789465457201004, "rewards/rejected": -0.3534783124923706, "step": 14 }, { "epoch": 0.03955174686882004, "grad_norm": 39.49089431762695, "learning_rate": 7.964507542147292e-06, "log_odds_chosen": 0.5139614343643188, "log_odds_ratio": -0.4868500828742981, "logits/chosen": -0.2310849130153656, "logits/rejected": -0.18353192508220673, "logps/chosen": -3.5987987518310547, "logps/rejected": -4.098852157592773, "loss": 9.3649, "nll_loss": 2.292545795440674, "rewards/accuracies": 0.875, "rewards/chosen": -0.35987988114356995, "rewards/margins": 0.05000537261366844, "rewards/rejected": -0.4098852574825287, "step": 15 }, { "epoch": 0.04218852999340804, "grad_norm": 36.65208435058594, "learning_rate": 7.957409050576753e-06, "log_odds_chosen": 0.5320571064949036, "log_odds_ratio": -0.4787905514240265, "logits/chosen": -0.2220565676689148, "logits/rejected": -0.17649269104003906, "logps/chosen": -3.046041250228882, "logps/rejected": -3.5599379539489746, "loss": 8.9619, "nll_loss": 2.192599058151245, "rewards/accuracies": 1.0, "rewards/chosen": -0.30460411310195923, "rewards/margins": 0.051389675587415695, "rewards/rejected": -0.3559938073158264, "step": 16 }, { "epoch": 0.04482531311799604, "grad_norm": 37.12849807739258, "learning_rate": 7.95031055900621e-06, "log_odds_chosen": 0.4501737654209137, "log_odds_ratio": -0.5116550326347351, "logits/chosen": -0.2232360541820526, "logits/rejected": -0.19859637320041656, "logps/chosen": -2.8962063789367676, "logps/rejected": -3.327422857284546, "loss": 8.5734, "nll_loss": 2.0921807289123535, "rewards/accuracies": 0.875, "rewards/chosen": -0.28962063789367676, "rewards/margins": 0.04312165826559067, "rewards/rejected": -0.332742303609848, "step": 17 }, { "epoch": 0.047462096242584045, "grad_norm": 41.37774658203125, "learning_rate": 7.943212067435669e-06, "log_odds_chosen": 0.6656603813171387, "log_odds_ratio": -0.44296392798423767, "logits/chosen": -0.0800028070807457, "logits/rejected": -0.06333664804697037, "logps/chosen": -3.446854829788208, "logps/rejected": -4.09830379486084, "loss": 8.5558, "nll_loss": 2.0946435928344727, "rewards/accuracies": 0.75, "rewards/chosen": -0.34468552470207214, "rewards/margins": 0.06514490395784378, "rewards/rejected": -0.40983039140701294, "step": 18 }, { "epoch": 0.05009887936717205, "grad_norm": 40.35208511352539, "learning_rate": 7.936113575865128e-06, "log_odds_chosen": 0.5608517527580261, "log_odds_ratio": -0.47481366991996765, "logits/chosen": -0.14373314380645752, "logits/rejected": -0.12235800921916962, "logps/chosen": -3.744997978210449, "logps/rejected": -4.289097309112549, "loss": 8.5693, "nll_loss": 2.0948424339294434, "rewards/accuracies": 0.875, "rewards/chosen": -0.3744997978210449, "rewards/margins": 0.05440989136695862, "rewards/rejected": -0.4289097189903259, "step": 19 }, { "epoch": 0.05273566249176005, "grad_norm": 35.29018783569336, "learning_rate": 7.929015084294587e-06, "log_odds_chosen": 0.399916410446167, "log_odds_ratio": -0.52656090259552, "logits/chosen": -0.2085961252450943, "logits/rejected": -0.178694948554039, "logps/chosen": -3.017723560333252, "logps/rejected": -3.3984627723693848, "loss": 8.2289, "nll_loss": 2.004572868347168, "rewards/accuracies": 0.875, "rewards/chosen": -0.3017723560333252, "rewards/margins": 0.038073912262916565, "rewards/rejected": -0.33984625339508057, "step": 20 }, { "epoch": 0.055372445616348055, "grad_norm": 33.76988220214844, "learning_rate": 7.921916592724046e-06, "log_odds_chosen": 0.5948774814605713, "log_odds_ratio": -0.4468332827091217, "logits/chosen": -0.23783668875694275, "logits/rejected": -0.13789881765842438, "logps/chosen": -2.8810017108917236, "logps/rejected": -3.4495158195495605, "loss": 8.3768, "nll_loss": 2.0495107173919678, "rewards/accuracies": 1.0, "rewards/chosen": -0.2881001830101013, "rewards/margins": 0.05685139447450638, "rewards/rejected": -0.3449515700340271, "step": 21 }, { "epoch": 0.05800922874093606, "grad_norm": 36.270206451416016, "learning_rate": 7.914818101153505e-06, "log_odds_chosen": 0.4766322374343872, "log_odds_ratio": -0.4933544993400574, "logits/chosen": -0.12822745740413666, "logits/rejected": -0.10519365966320038, "logps/chosen": -3.1237192153930664, "logps/rejected": -3.577584743499756, "loss": 8.0839, "nll_loss": 1.9716460704803467, "rewards/accuracies": 1.0, "rewards/chosen": -0.31237196922302246, "rewards/margins": 0.04538654536008835, "rewards/rejected": -0.35775846242904663, "step": 22 }, { "epoch": 0.06064601186552406, "grad_norm": 36.74298095703125, "learning_rate": 7.907719609582964e-06, "log_odds_chosen": 0.6195639371871948, "log_odds_ratio": -0.440764844417572, "logits/chosen": -0.16736823320388794, "logits/rejected": -0.14166246354579926, "logps/chosen": -3.133723497390747, "logps/rejected": -3.726114273071289, "loss": 7.7208, "nll_loss": 1.8861268758773804, "rewards/accuracies": 1.0, "rewards/chosen": -0.31337234377861023, "rewards/margins": 0.05923907458782196, "rewards/rejected": -0.3726114332675934, "step": 23 }, { "epoch": 0.06328279499011207, "grad_norm": 31.225141525268555, "learning_rate": 7.900621118012423e-06, "log_odds_chosen": 0.2814294099807739, "log_odds_ratio": -0.568894624710083, "logits/chosen": -0.27078360319137573, "logits/rejected": -0.26242709159851074, "logps/chosen": -2.6045753955841064, "logps/rejected": -2.865044593811035, "loss": 7.9579, "nll_loss": 1.9325947761535645, "rewards/accuracies": 1.0, "rewards/chosen": -0.2604575455188751, "rewards/margins": 0.026046905666589737, "rewards/rejected": -0.28650444746017456, "step": 24 }, { "epoch": 0.06591957811470006, "grad_norm": 35.55276870727539, "learning_rate": 7.893522626441881e-06, "log_odds_chosen": 0.5024213790893555, "log_odds_ratio": -0.4903920292854309, "logits/chosen": -0.20812958478927612, "logits/rejected": -0.17434272170066833, "logps/chosen": -3.215907335281372, "logps/rejected": -3.6927273273468018, "loss": 7.617, "nll_loss": 1.8552184104919434, "rewards/accuracies": 0.75, "rewards/chosen": -0.32159075140953064, "rewards/margins": 0.047681987285614014, "rewards/rejected": -0.36927270889282227, "step": 25 }, { "epoch": 0.06855636123928807, "grad_norm": 38.64487838745117, "learning_rate": 7.886424134871339e-06, "log_odds_chosen": 0.4476067125797272, "log_odds_ratio": -0.51563960313797, "logits/chosen": -0.0307003203779459, "logits/rejected": -0.011324996128678322, "logps/chosen": -3.4396426677703857, "logps/rejected": -3.8734965324401855, "loss": 7.9103, "nll_loss": 1.926013469696045, "rewards/accuracies": 1.0, "rewards/chosen": -0.34396427869796753, "rewards/margins": 0.043385379016399384, "rewards/rejected": -0.3873496651649475, "step": 26 }, { "epoch": 0.07119314436387607, "grad_norm": 31.943300247192383, "learning_rate": 7.8793256433008e-06, "log_odds_chosen": 0.36695021390914917, "log_odds_ratio": -0.5485100746154785, "logits/chosen": -0.3074036240577698, "logits/rejected": -0.25627627968788147, "logps/chosen": -2.7426533699035645, "logps/rejected": -3.0948386192321777, "loss": 7.64, "nll_loss": 1.855139970779419, "rewards/accuracies": 0.75, "rewards/chosen": -0.2742653489112854, "rewards/margins": 0.0352184996008873, "rewards/rejected": -0.3094838261604309, "step": 27 }, { "epoch": 0.07382992748846408, "grad_norm": 32.09175491333008, "learning_rate": 7.872227151730256e-06, "log_odds_chosen": 0.6224421262741089, "log_odds_ratio": -0.43833571672439575, "logits/chosen": -0.2338484525680542, "logits/rejected": -0.19638372957706451, "logps/chosen": -2.5678858757019043, "logps/rejected": -3.1542539596557617, "loss": 7.3637, "nll_loss": 1.7970834970474243, "rewards/accuracies": 1.0, "rewards/chosen": -0.25678858160972595, "rewards/margins": 0.05863682180643082, "rewards/rejected": -0.31542539596557617, "step": 28 }, { "epoch": 0.07646671061305207, "grad_norm": 35.37105941772461, "learning_rate": 7.865128660159715e-06, "log_odds_chosen": 0.4396553933620453, "log_odds_ratio": -0.5348554849624634, "logits/chosen": -0.12305408716201782, "logits/rejected": -0.096021369099617, "logps/chosen": -2.859783411026001, "logps/rejected": -3.2838082313537598, "loss": 6.9916, "nll_loss": 1.6944115161895752, "rewards/accuracies": 0.875, "rewards/chosen": -0.2859783172607422, "rewards/margins": 0.042402494698762894, "rewards/rejected": -0.328380823135376, "step": 29 }, { "epoch": 0.07910349373764008, "grad_norm": 33.27079772949219, "learning_rate": 7.858030168589174e-06, "log_odds_chosen": 0.4660126864910126, "log_odds_ratio": -0.519025981426239, "logits/chosen": -0.2649393379688263, "logits/rejected": -0.227127343416214, "logps/chosen": -2.5917606353759766, "logps/rejected": -3.015207052230835, "loss": 6.691, "nll_loss": 1.6208468675613403, "rewards/accuracies": 0.875, "rewards/chosen": -0.2591760754585266, "rewards/margins": 0.04234464839100838, "rewards/rejected": -0.3015207052230835, "step": 30 }, { "epoch": 0.08174027686222808, "grad_norm": 32.39740753173828, "learning_rate": 7.850931677018633e-06, "log_odds_chosen": 0.20572400093078613, "log_odds_ratio": -0.6036237478256226, "logits/chosen": -0.21612390875816345, "logits/rejected": -0.195010244846344, "logps/chosen": -2.8395891189575195, "logps/rejected": -3.036059856414795, "loss": 7.402, "nll_loss": 1.7901456356048584, "rewards/accuracies": 0.875, "rewards/chosen": -0.28395891189575195, "rewards/margins": 0.01964707300066948, "rewards/rejected": -0.3036060035228729, "step": 31 }, { "epoch": 0.08437705998681608, "grad_norm": 31.24039649963379, "learning_rate": 7.843833185448092e-06, "log_odds_chosen": 0.22066299617290497, "log_odds_ratio": -0.5980744957923889, "logits/chosen": -0.25658881664276123, "logits/rejected": -0.2568542957305908, "logps/chosen": -2.5169811248779297, "logps/rejected": -2.7204983234405518, "loss": 7.0351, "nll_loss": 1.6989648342132568, "rewards/accuracies": 0.75, "rewards/chosen": -0.2516981363296509, "rewards/margins": 0.0203517135232687, "rewards/rejected": -0.27204984426498413, "step": 32 }, { "epoch": 0.08701384311140409, "grad_norm": 31.878759384155273, "learning_rate": 7.836734693877551e-06, "log_odds_chosen": 0.6456508636474609, "log_odds_ratio": -0.43579402565956116, "logits/chosen": -0.23061901330947876, "logits/rejected": -0.19082020223140717, "logps/chosen": -2.4211320877075195, "logps/rejected": -3.021599292755127, "loss": 6.5759, "nll_loss": 1.6003878116607666, "rewards/accuracies": 1.0, "rewards/chosen": -0.24211320281028748, "rewards/margins": 0.060046710073947906, "rewards/rejected": -0.3021599054336548, "step": 33 }, { "epoch": 0.08965062623599208, "grad_norm": 28.477542877197266, "learning_rate": 7.82963620230701e-06, "log_odds_chosen": 0.33213815093040466, "log_odds_ratio": -0.5480948090553284, "logits/chosen": -0.3933866322040558, "logits/rejected": -0.337604820728302, "logps/chosen": -2.40647029876709, "logps/rejected": -2.713749408721924, "loss": 6.5337, "nll_loss": 1.5786075592041016, "rewards/accuracies": 0.75, "rewards/chosen": -0.2406470775604248, "rewards/margins": 0.030727902427315712, "rewards/rejected": -0.2713749408721924, "step": 34 }, { "epoch": 0.0922874093605801, "grad_norm": 31.25120735168457, "learning_rate": 7.822537710736469e-06, "log_odds_chosen": 0.4866493344306946, "log_odds_ratio": -0.49146533012390137, "logits/chosen": -0.24240362644195557, "logits/rejected": -0.19784504175186157, "logps/chosen": -2.4882521629333496, "logps/rejected": -2.9402499198913574, "loss": 6.5205, "nll_loss": 1.5809718370437622, "rewards/accuracies": 1.0, "rewards/chosen": -0.24882523715496063, "rewards/margins": 0.04519975930452347, "rewards/rejected": -0.2940249741077423, "step": 35 }, { "epoch": 0.09492419248516809, "grad_norm": 30.688030242919922, "learning_rate": 7.815439219165928e-06, "log_odds_chosen": 0.5005679726600647, "log_odds_ratio": -0.47987300157546997, "logits/chosen": -0.21071836352348328, "logits/rejected": -0.17459529638290405, "logps/chosen": -2.2092747688293457, "logps/rejected": -2.6628780364990234, "loss": 5.9388, "nll_loss": 1.4367212057113647, "rewards/accuracies": 1.0, "rewards/chosen": -0.22092747688293457, "rewards/margins": 0.04536033049225807, "rewards/rejected": -0.26628780364990234, "step": 36 }, { "epoch": 0.0975609756097561, "grad_norm": 29.692096710205078, "learning_rate": 7.808340727595385e-06, "log_odds_chosen": 0.42486512660980225, "log_odds_ratio": -0.5341512560844421, "logits/chosen": -0.25842246413230896, "logits/rejected": -0.2083197683095932, "logps/chosen": -2.398265838623047, "logps/rejected": -2.7930338382720947, "loss": 6.3507, "nll_loss": 1.5342501401901245, "rewards/accuracies": 0.625, "rewards/chosen": -0.23982658982276917, "rewards/margins": 0.03947678953409195, "rewards/rejected": -0.2793033719062805, "step": 37 }, { "epoch": 0.1001977587343441, "grad_norm": 30.48540496826172, "learning_rate": 7.801242236024844e-06, "log_odds_chosen": 0.4453636705875397, "log_odds_ratio": -0.5074580907821655, "logits/chosen": -0.21573767066001892, "logits/rejected": -0.18589571118354797, "logps/chosen": -2.112009048461914, "logps/rejected": -2.5102884769439697, "loss": 5.7095, "nll_loss": 1.3766206502914429, "rewards/accuracies": 1.0, "rewards/chosen": -0.21120090782642365, "rewards/margins": 0.039827942848205566, "rewards/rejected": -0.251028835773468, "step": 38 }, { "epoch": 0.10283454185893211, "grad_norm": 30.601762771606445, "learning_rate": 7.794143744454303e-06, "log_odds_chosen": 0.42712199687957764, "log_odds_ratio": -0.5272745490074158, "logits/chosen": -0.21021857857704163, "logits/rejected": -0.15227194130420685, "logps/chosen": -2.536367416381836, "logps/rejected": -2.927422046661377, "loss": 6.0723, "nll_loss": 1.465356469154358, "rewards/accuracies": 0.875, "rewards/chosen": -0.2536367177963257, "rewards/margins": 0.03910548612475395, "rewards/rejected": -0.29274219274520874, "step": 39 }, { "epoch": 0.1054713249835201, "grad_norm": 30.587663650512695, "learning_rate": 7.787045252883762e-06, "log_odds_chosen": 0.3872503340244293, "log_odds_ratio": -0.540022075176239, "logits/chosen": -0.20031091570854187, "logits/rejected": -0.16761323809623718, "logps/chosen": -2.5356223583221436, "logps/rejected": -2.9007105827331543, "loss": 6.0481, "nll_loss": 1.4580141305923462, "rewards/accuracies": 0.75, "rewards/chosen": -0.25356224179267883, "rewards/margins": 0.03650882840156555, "rewards/rejected": -0.2900710701942444, "step": 40 }, { "epoch": 0.10810810810810811, "grad_norm": 28.171480178833008, "learning_rate": 7.77994676131322e-06, "log_odds_chosen": 0.5199911594390869, "log_odds_ratio": -0.4783152937889099, "logits/chosen": -0.29706722497940063, "logits/rejected": -0.26550137996673584, "logps/chosen": -2.006732940673828, "logps/rejected": -2.4726297855377197, "loss": 5.4301, "nll_loss": 1.3096997737884521, "rewards/accuracies": 1.0, "rewards/chosen": -0.20067329704761505, "rewards/margins": 0.0465896800160408, "rewards/rejected": -0.24726298451423645, "step": 41 }, { "epoch": 0.11074489123269611, "grad_norm": 31.35000228881836, "learning_rate": 7.77284826974268e-06, "log_odds_chosen": 0.34326064586639404, "log_odds_ratio": -0.55986088514328, "logits/chosen": -0.12557223439216614, "logits/rejected": -0.1102723777294159, "logps/chosen": -2.5316214561462402, "logps/rejected": -2.8401966094970703, "loss": 5.5317, "nll_loss": 1.3269493579864502, "rewards/accuracies": 0.625, "rewards/chosen": -0.253162145614624, "rewards/margins": 0.030857522040605545, "rewards/rejected": -0.28401967883110046, "step": 42 }, { "epoch": 0.1133816743572841, "grad_norm": 28.0591983795166, "learning_rate": 7.765749778172138e-06, "log_odds_chosen": 0.31989234685897827, "log_odds_ratio": -0.5515447854995728, "logits/chosen": -0.2060682773590088, "logits/rejected": -0.17958971858024597, "logps/chosen": -2.2945690155029297, "logps/rejected": -2.5878024101257324, "loss": 5.7537, "nll_loss": 1.383264422416687, "rewards/accuracies": 1.0, "rewards/chosen": -0.22945690155029297, "rewards/margins": 0.02932334691286087, "rewards/rejected": -0.25878024101257324, "step": 43 }, { "epoch": 0.11601845748187212, "grad_norm": 29.306196212768555, "learning_rate": 7.758651286601597e-06, "log_odds_chosen": 0.22514420747756958, "log_odds_ratio": -0.5939244031906128, "logits/chosen": -0.14782042801380157, "logits/rejected": -0.13932658731937408, "logps/chosen": -2.3671927452087402, "logps/rejected": -2.5678796768188477, "loss": 5.5461, "nll_loss": 1.3271353244781494, "rewards/accuracies": 0.75, "rewards/chosen": -0.2367192804813385, "rewards/margins": 0.020068688318133354, "rewards/rejected": -0.2567879557609558, "step": 44 }, { "epoch": 0.11865524060646011, "grad_norm": 27.434146881103516, "learning_rate": 7.751552795031056e-06, "log_odds_chosen": 0.39294224977493286, "log_odds_ratio": -0.5219178199768066, "logits/chosen": -0.31487947702407837, "logits/rejected": -0.2837178707122803, "logps/chosen": -2.429870128631592, "logps/rejected": -2.7943930625915527, "loss": 5.7072, "nll_loss": 1.3745999336242676, "rewards/accuracies": 1.0, "rewards/chosen": -0.2429870069026947, "rewards/margins": 0.03645233064889908, "rewards/rejected": -0.2794393301010132, "step": 45 }, { "epoch": 0.12129202373104812, "grad_norm": 26.80518913269043, "learning_rate": 7.744454303460515e-06, "log_odds_chosen": 0.27651965618133545, "log_odds_ratio": -0.5777378082275391, "logits/chosen": -0.2640863358974457, "logits/rejected": -0.24117253720760345, "logps/chosen": -2.340364456176758, "logps/rejected": -2.596097469329834, "loss": 5.7167, "nll_loss": 1.371401071548462, "rewards/accuracies": 0.875, "rewards/chosen": -0.23403644561767578, "rewards/margins": 0.02557331509888172, "rewards/rejected": -0.25960975885391235, "step": 46 }, { "epoch": 0.12392880685563612, "grad_norm": 24.878803253173828, "learning_rate": 7.737355811889972e-06, "log_odds_chosen": 0.23489218950271606, "log_odds_ratio": -0.590002179145813, "logits/chosen": -0.3505256772041321, "logits/rejected": -0.3077712953090668, "logps/chosen": -2.2546756267547607, "logps/rejected": -2.4655075073242188, "loss": 5.5538, "nll_loss": 1.3294399976730347, "rewards/accuracies": 0.75, "rewards/chosen": -0.22546756267547607, "rewards/margins": 0.021083198487758636, "rewards/rejected": -0.2465507686138153, "step": 47 }, { "epoch": 0.12656558998022413, "grad_norm": 26.178964614868164, "learning_rate": 7.730257320319431e-06, "log_odds_chosen": 0.21647870540618896, "log_odds_ratio": -0.5960606336593628, "logits/chosen": -0.2653028964996338, "logits/rejected": -0.22831164300441742, "logps/chosen": -2.2759790420532227, "logps/rejected": -2.4663686752319336, "loss": 5.1854, "nll_loss": 1.2367491722106934, "rewards/accuracies": 0.875, "rewards/chosen": -0.2275979220867157, "rewards/margins": 0.019038967788219452, "rewards/rejected": -0.24663689732551575, "step": 48 }, { "epoch": 0.12920237310481214, "grad_norm": 22.515853881835938, "learning_rate": 7.72315882874889e-06, "log_odds_chosen": 0.4059191346168518, "log_odds_ratio": -0.5174556374549866, "logits/chosen": -0.4336870610713959, "logits/rejected": -0.3817170560359955, "logps/chosen": -1.9080209732055664, "logps/rejected": -2.260662794113159, "loss": 5.2487, "nll_loss": 1.2604243755340576, "rewards/accuracies": 1.0, "rewards/chosen": -0.19080209732055664, "rewards/margins": 0.035264186561107635, "rewards/rejected": -0.22606629133224487, "step": 49 }, { "epoch": 0.13183915622940012, "grad_norm": 22.30443572998047, "learning_rate": 7.716060337178349e-06, "log_odds_chosen": 0.35167044401168823, "log_odds_ratio": -0.5408763289451599, "logits/chosen": -0.3750547766685486, "logits/rejected": -0.35194963216781616, "logps/chosen": -2.000838279724121, "logps/rejected": -2.3132987022399902, "loss": 5.5558, "nll_loss": 1.3348667621612549, "rewards/accuracies": 0.875, "rewards/chosen": -0.20008382201194763, "rewards/margins": 0.03124604932963848, "rewards/rejected": -0.23132985830307007, "step": 50 }, { "epoch": 0.13447593935398813, "grad_norm": 25.707345962524414, "learning_rate": 7.708961845607808e-06, "log_odds_chosen": 0.5400259494781494, "log_odds_ratio": -0.5052967667579651, "logits/chosen": -0.23230978846549988, "logits/rejected": -0.21948783099651337, "logps/chosen": -2.189897298812866, "logps/rejected": -2.696154832839966, "loss": 4.6871, "nll_loss": 1.1212360858917236, "rewards/accuracies": 0.875, "rewards/chosen": -0.21898972988128662, "rewards/margins": 0.05062573403120041, "rewards/rejected": -0.2696154713630676, "step": 51 }, { "epoch": 0.13711272247857614, "grad_norm": 24.343521118164062, "learning_rate": 7.701863354037267e-06, "log_odds_chosen": 0.1825106143951416, "log_odds_ratio": -0.6162492036819458, "logits/chosen": -0.2806670367717743, "logits/rejected": -0.20971013605594635, "logps/chosen": -2.2124099731445312, "logps/rejected": -2.375056505203247, "loss": 5.2171, "nll_loss": 1.2426531314849854, "rewards/accuracies": 0.625, "rewards/chosen": -0.22124099731445312, "rewards/margins": 0.016264665871858597, "rewards/rejected": -0.23750565946102142, "step": 52 }, { "epoch": 0.13974950560316415, "grad_norm": 23.199983596801758, "learning_rate": 7.694764862466726e-06, "log_odds_chosen": 0.4220227897167206, "log_odds_ratio": -0.50751793384552, "logits/chosen": -0.3427724838256836, "logits/rejected": -0.2741890251636505, "logps/chosen": -2.321899890899658, "logps/rejected": -2.708897352218628, "loss": 5.2747, "nll_loss": 1.2679214477539062, "rewards/accuracies": 1.0, "rewards/chosen": -0.23218998312950134, "rewards/margins": 0.038699738681316376, "rewards/rejected": -0.2708897292613983, "step": 53 }, { "epoch": 0.14238628872775214, "grad_norm": 23.460235595703125, "learning_rate": 7.687666370896184e-06, "log_odds_chosen": 0.3479459285736084, "log_odds_ratio": -0.558416485786438, "logits/chosen": -0.334983229637146, "logits/rejected": -0.3043859899044037, "logps/chosen": -2.2959542274475098, "logps/rejected": -2.6057186126708984, "loss": 4.7528, "nll_loss": 1.1323628425598145, "rewards/accuracies": 0.875, "rewards/chosen": -0.22959542274475098, "rewards/margins": 0.03097643330693245, "rewards/rejected": -0.26057183742523193, "step": 54 }, { "epoch": 0.14502307185234015, "grad_norm": 21.800100326538086, "learning_rate": 7.680567879325643e-06, "log_odds_chosen": 0.5213094353675842, "log_odds_ratio": -0.47090500593185425, "logits/chosen": -0.38391953706741333, "logits/rejected": -0.34351029992103577, "logps/chosen": -1.900581955909729, "logps/rejected": -2.3608274459838867, "loss": 4.8139, "nll_loss": 1.1563955545425415, "rewards/accuracies": 1.0, "rewards/chosen": -0.19005818665027618, "rewards/margins": 0.046024568378925323, "rewards/rejected": -0.2360827624797821, "step": 55 }, { "epoch": 0.14765985497692816, "grad_norm": 22.829553604125977, "learning_rate": 7.6734693877551e-06, "log_odds_chosen": 0.3803178668022156, "log_odds_ratio": -0.5260686874389648, "logits/chosen": -0.317103773355484, "logits/rejected": -0.2755615711212158, "logps/chosen": -2.044250249862671, "logps/rejected": -2.3848509788513184, "loss": 4.6378, "nll_loss": 1.106835961341858, "rewards/accuracies": 1.0, "rewards/chosen": -0.20442502200603485, "rewards/margins": 0.03406006097793579, "rewards/rejected": -0.23848509788513184, "step": 56 }, { "epoch": 0.15029663810151614, "grad_norm": 22.079294204711914, "learning_rate": 7.666370896184561e-06, "log_odds_chosen": 0.39094048738479614, "log_odds_ratio": -0.5204463005065918, "logits/chosen": -0.3188185691833496, "logits/rejected": -0.2769733965396881, "logps/chosen": -1.9722585678100586, "logps/rejected": -2.312635660171509, "loss": 4.45, "nll_loss": 1.0604636669158936, "rewards/accuracies": 1.0, "rewards/chosen": -0.19722585380077362, "rewards/margins": 0.03403770551085472, "rewards/rejected": -0.23126356303691864, "step": 57 }, { "epoch": 0.15293342122610415, "grad_norm": 22.725160598754883, "learning_rate": 7.659272404614018e-06, "log_odds_chosen": 0.36315977573394775, "log_odds_ratio": -0.5431260466575623, "logits/chosen": -0.35522258281707764, "logits/rejected": -0.3284539580345154, "logps/chosen": -2.1801340579986572, "logps/rejected": -2.5099594593048096, "loss": 4.5113, "nll_loss": 1.0735070705413818, "rewards/accuracies": 0.875, "rewards/chosen": -0.21801342070102692, "rewards/margins": 0.03298252820968628, "rewards/rejected": -0.2509959638118744, "step": 58 }, { "epoch": 0.15557020435069216, "grad_norm": 20.704824447631836, "learning_rate": 7.652173913043479e-06, "log_odds_chosen": 0.30395179986953735, "log_odds_ratio": -0.5613071918487549, "logits/chosen": -0.40550240874290466, "logits/rejected": -0.36863192915916443, "logps/chosen": -2.019667625427246, "logps/rejected": -2.285858154296875, "loss": 4.3846, "nll_loss": 1.0400309562683105, "rewards/accuracies": 0.875, "rewards/chosen": -0.2019667625427246, "rewards/margins": 0.02661903388798237, "rewards/rejected": -0.22858580946922302, "step": 59 }, { "epoch": 0.15820698747528017, "grad_norm": 20.56707000732422, "learning_rate": 7.645075421472936e-06, "log_odds_chosen": 0.1940009742975235, "log_odds_ratio": -0.6048257946968079, "logits/chosen": -0.4033902585506439, "logits/rejected": -0.3881780207157135, "logps/chosen": -2.059615135192871, "logps/rejected": -2.232116460800171, "loss": 4.3671, "nll_loss": 1.0312800407409668, "rewards/accuracies": 0.875, "rewards/chosen": -0.20596152544021606, "rewards/margins": 0.017250144854187965, "rewards/rejected": -0.22321167588233948, "step": 60 }, { "epoch": 0.16084377059986815, "grad_norm": 19.30805015563965, "learning_rate": 7.637976929902395e-06, "log_odds_chosen": 0.286902517080307, "log_odds_ratio": -0.5759099721908569, "logits/chosen": -0.51356440782547, "logits/rejected": -0.4340498149394989, "logps/chosen": -2.036076307296753, "logps/rejected": -2.2877118587493896, "loss": 4.7367, "nll_loss": 1.1265827417373657, "rewards/accuracies": 0.875, "rewards/chosen": -0.20360763370990753, "rewards/margins": 0.0251635629683733, "rewards/rejected": -0.22877119481563568, "step": 61 }, { "epoch": 0.16348055372445616, "grad_norm": 18.822423934936523, "learning_rate": 7.630878438331854e-06, "log_odds_chosen": 0.1922874003648758, "log_odds_ratio": -0.615166962146759, "logits/chosen": -0.48963281512260437, "logits/rejected": -0.4330398440361023, "logps/chosen": -1.8455352783203125, "logps/rejected": -2.005401134490967, "loss": 4.0266, "nll_loss": 0.9451347589492798, "rewards/accuracies": 0.625, "rewards/chosen": -0.18455354869365692, "rewards/margins": 0.015986589714884758, "rewards/rejected": -0.20054014027118683, "step": 62 }, { "epoch": 0.16611733684904417, "grad_norm": 19.095998764038086, "learning_rate": 7.623779946761314e-06, "log_odds_chosen": 0.21310049295425415, "log_odds_ratio": -0.598873496055603, "logits/chosen": -0.537045955657959, "logits/rejected": -0.4830659329891205, "logps/chosen": -1.9617701768875122, "logps/rejected": -2.1539487838745117, "loss": 4.0892, "nll_loss": 0.9624119997024536, "rewards/accuracies": 0.75, "rewards/chosen": -0.19617702066898346, "rewards/margins": 0.019217852503061295, "rewards/rejected": -0.21539486944675446, "step": 63 }, { "epoch": 0.16875411997363216, "grad_norm": 18.428142547607422, "learning_rate": 7.616681455190772e-06, "log_odds_chosen": 0.41756024956703186, "log_odds_ratio": -0.523665189743042, "logits/chosen": -0.5694464445114136, "logits/rejected": -0.5305702686309814, "logps/chosen": -1.8997293710708618, "logps/rejected": -2.250817060470581, "loss": 4.0725, "nll_loss": 0.9657527208328247, "rewards/accuracies": 0.875, "rewards/chosen": -0.18997293710708618, "rewards/margins": 0.035108763724565506, "rewards/rejected": -0.22508171200752258, "step": 64 }, { "epoch": 0.17139090309822017, "grad_norm": 20.251195907592773, "learning_rate": 7.60958296362023e-06, "log_odds_chosen": 0.28324076533317566, "log_odds_ratio": -0.5701109170913696, "logits/chosen": -0.4854946434497833, "logits/rejected": -0.4697068929672241, "logps/chosen": -2.150897264480591, "logps/rejected": -2.4024617671966553, "loss": 3.3262, "nll_loss": 0.7745460271835327, "rewards/accuracies": 0.75, "rewards/chosen": -0.21508973836898804, "rewards/margins": 0.025156456977128983, "rewards/rejected": -0.24024619162082672, "step": 65 }, { "epoch": 0.17402768622280818, "grad_norm": 16.271448135375977, "learning_rate": 7.6024844720496895e-06, "log_odds_chosen": 0.35990890860557556, "log_odds_ratio": -0.5400576591491699, "logits/chosen": -0.7607088088989258, "logits/rejected": -0.7190062403678894, "logps/chosen": -1.8075987100601196, "logps/rejected": -2.117114543914795, "loss": 4.0742, "nll_loss": 0.9645355343818665, "rewards/accuracies": 0.875, "rewards/chosen": -0.18075986206531525, "rewards/margins": 0.030951591208577156, "rewards/rejected": -0.21171148121356964, "step": 66 }, { "epoch": 0.17666446934739619, "grad_norm": 17.613418579101562, "learning_rate": 7.595385980479148e-06, "log_odds_chosen": 0.33518755435943604, "log_odds_ratio": -0.5439249277114868, "logits/chosen": -0.6029800176620483, "logits/rejected": -0.5759132504463196, "logps/chosen": -2.030747175216675, "logps/rejected": -2.32686185836792, "loss": 3.4952, "nll_loss": 0.8194155693054199, "rewards/accuracies": 1.0, "rewards/chosen": -0.20307472348213196, "rewards/margins": 0.029611455276608467, "rewards/rejected": -0.23268617689609528, "step": 67 }, { "epoch": 0.17930125247198417, "grad_norm": 16.98110580444336, "learning_rate": 7.5882874889086065e-06, "log_odds_chosen": 0.3332338333129883, "log_odds_ratio": -0.5504011511802673, "logits/chosen": -0.6441569924354553, "logits/rejected": -0.6330841779708862, "logps/chosen": -1.8422008752822876, "logps/rejected": -2.1244189739227295, "loss": 3.2622, "nll_loss": 0.760502815246582, "rewards/accuracies": 1.0, "rewards/chosen": -0.1842200756072998, "rewards/margins": 0.02822180651128292, "rewards/rejected": -0.21244189143180847, "step": 68 }, { "epoch": 0.18193803559657218, "grad_norm": 15.15079116821289, "learning_rate": 7.581188997338065e-06, "log_odds_chosen": 0.4982764720916748, "log_odds_ratio": -0.4950292706489563, "logits/chosen": -0.7367129921913147, "logits/rejected": -0.6525557041168213, "logps/chosen": -1.8149136304855347, "logps/rejected": -2.24688982963562, "loss": 3.6231, "nll_loss": 0.856282651424408, "rewards/accuracies": 0.875, "rewards/chosen": -0.18149137496948242, "rewards/margins": 0.04319760575890541, "rewards/rejected": -0.22468897700309753, "step": 69 }, { "epoch": 0.1845748187211602, "grad_norm": 15.011249542236328, "learning_rate": 7.574090505767524e-06, "log_odds_chosen": 0.3177182972431183, "log_odds_ratio": -0.5522245764732361, "logits/chosen": -0.7826690673828125, "logits/rejected": -0.7186380624771118, "logps/chosen": -1.934792160987854, "logps/rejected": -2.2128076553344727, "loss": 3.9311, "nll_loss": 0.9275611639022827, "rewards/accuracies": 0.875, "rewards/chosen": -0.19347921013832092, "rewards/margins": 0.02780154161155224, "rewards/rejected": -0.2212807685136795, "step": 70 }, { "epoch": 0.1872116018457482, "grad_norm": 14.627659797668457, "learning_rate": 7.566992014196982e-06, "log_odds_chosen": 0.38591426610946655, "log_odds_ratio": -0.5270495414733887, "logits/chosen": -0.7557522058486938, "logits/rejected": -0.7283233404159546, "logps/chosen": -1.9029791355133057, "logps/rejected": -2.241738796234131, "loss": 3.7175, "nll_loss": 0.8766695261001587, "rewards/accuracies": 0.875, "rewards/chosen": -0.1902979165315628, "rewards/margins": 0.0338759645819664, "rewards/rejected": -0.2241738736629486, "step": 71 }, { "epoch": 0.18984838497033618, "grad_norm": 14.73865795135498, "learning_rate": 7.559893522626442e-06, "log_odds_chosen": 0.1897004246711731, "log_odds_ratio": -0.6099416017532349, "logits/chosen": -0.7527236938476562, "logits/rejected": -0.7232762575149536, "logps/chosen": -1.9559646844863892, "logps/rejected": -2.1254239082336426, "loss": 3.8146, "nll_loss": 0.8926578164100647, "rewards/accuracies": 0.875, "rewards/chosen": -0.19559648633003235, "rewards/margins": 0.01694592274725437, "rewards/rejected": -0.21254241466522217, "step": 72 }, { "epoch": 0.1924851680949242, "grad_norm": 13.457784652709961, "learning_rate": 7.5527950310559e-06, "log_odds_chosen": 0.5551372170448303, "log_odds_ratio": -0.46496695280075073, "logits/chosen": -0.8097226619720459, "logits/rejected": -0.7585113048553467, "logps/chosen": -1.8708417415618896, "logps/rejected": -2.3605213165283203, "loss": 3.5045, "nll_loss": 0.8296377062797546, "rewards/accuracies": 1.0, "rewards/chosen": -0.1870841681957245, "rewards/margins": 0.04896795004606247, "rewards/rejected": -0.23605212569236755, "step": 73 }, { "epoch": 0.1951219512195122, "grad_norm": 13.562307357788086, "learning_rate": 7.545696539485359e-06, "log_odds_chosen": 0.28339290618896484, "log_odds_ratio": -0.5632253885269165, "logits/chosen": -0.7512806057929993, "logits/rejected": -0.7221646904945374, "logps/chosen": -1.907462477684021, "logps/rejected": -2.1500632762908936, "loss": 3.4105, "nll_loss": 0.7963072657585144, "rewards/accuracies": 1.0, "rewards/chosen": -0.1907462477684021, "rewards/margins": 0.024260083213448524, "rewards/rejected": -0.21500633656978607, "step": 74 }, { "epoch": 0.19775873434410018, "grad_norm": 13.646141052246094, "learning_rate": 7.538598047914818e-06, "log_odds_chosen": 0.5973946452140808, "log_odds_ratio": -0.4542398750782013, "logits/chosen": -0.7698791027069092, "logits/rejected": -0.7156640887260437, "logps/chosen": -1.7305474281311035, "logps/rejected": -2.2397546768188477, "loss": 3.325, "nll_loss": 0.7858337759971619, "rewards/accuracies": 1.0, "rewards/chosen": -0.17305472493171692, "rewards/margins": 0.05092073976993561, "rewards/rejected": -0.22397547960281372, "step": 75 }, { "epoch": 0.2003955174686882, "grad_norm": 13.815109252929688, "learning_rate": 7.531499556344276e-06, "log_odds_chosen": 0.0341314971446991, "log_odds_ratio": -0.6800359487533569, "logits/chosen": -0.7650290131568909, "logits/rejected": -0.717841386795044, "logps/chosen": -2.1082820892333984, "logps/rejected": -2.138892889022827, "loss": 3.5816, "nll_loss": 0.8274069428443909, "rewards/accuracies": 0.625, "rewards/chosen": -0.21082819998264313, "rewards/margins": 0.00306110642850399, "rewards/rejected": -0.21388930082321167, "step": 76 }, { "epoch": 0.2030323005932762, "grad_norm": 12.866390228271484, "learning_rate": 7.524401064773736e-06, "log_odds_chosen": 0.18123148381710052, "log_odds_ratio": -0.6105220317840576, "logits/chosen": -0.8196970820426941, "logits/rejected": -0.7854353189468384, "logps/chosen": -1.993229866027832, "logps/rejected": -2.1471104621887207, "loss": 3.7596, "nll_loss": 0.8788573741912842, "rewards/accuracies": 0.875, "rewards/chosen": -0.19932299852371216, "rewards/margins": 0.015388060361146927, "rewards/rejected": -0.21471107006072998, "step": 77 }, { "epoch": 0.20566908371786422, "grad_norm": 13.226037979125977, "learning_rate": 7.517302573203194e-06, "log_odds_chosen": 0.26490336656570435, "log_odds_ratio": -0.577185332775116, "logits/chosen": -0.8312850594520569, "logits/rejected": -0.7897067070007324, "logps/chosen": -1.751774549484253, "logps/rejected": -1.9779562950134277, "loss": 2.9881, "nll_loss": 0.6893104314804077, "rewards/accuracies": 0.875, "rewards/chosen": -0.1751774549484253, "rewards/margins": 0.022618159651756287, "rewards/rejected": -0.19779562950134277, "step": 78 }, { "epoch": 0.2083058668424522, "grad_norm": 12.413025856018066, "learning_rate": 7.510204081632653e-06, "log_odds_chosen": 0.20791912078857422, "log_odds_ratio": -0.6025508046150208, "logits/chosen": -0.8755151629447937, "logits/rejected": -0.8113049268722534, "logps/chosen": -1.8642596006393433, "logps/rejected": -2.0363807678222656, "loss": 3.5737, "nll_loss": 0.8331717252731323, "rewards/accuracies": 0.75, "rewards/chosen": -0.18642596900463104, "rewards/margins": 0.01721210777759552, "rewards/rejected": -0.20363807678222656, "step": 79 }, { "epoch": 0.2109426499670402, "grad_norm": 12.264641761779785, "learning_rate": 7.503105590062112e-06, "log_odds_chosen": 0.45694607496261597, "log_odds_ratio": -0.516982913017273, "logits/chosen": -0.9060852527618408, "logits/rejected": -0.8781797885894775, "logps/chosen": -1.6740509271621704, "logps/rejected": -2.0151681900024414, "loss": 3.312, "nll_loss": 0.7763122320175171, "rewards/accuracies": 0.875, "rewards/chosen": -0.16740509867668152, "rewards/margins": 0.03411173075437546, "rewards/rejected": -0.20151683688163757, "step": 80 }, { "epoch": 0.21357943309162822, "grad_norm": 12.642634391784668, "learning_rate": 7.49600709849157e-06, "log_odds_chosen": 0.24207936227321625, "log_odds_ratio": -0.5933969020843506, "logits/chosen": -0.9877604246139526, "logits/rejected": -0.9305794835090637, "logps/chosen": -1.8099536895751953, "logps/rejected": -2.010237455368042, "loss": 4.1916, "nll_loss": 0.9885674715042114, "rewards/accuracies": 0.875, "rewards/chosen": -0.180995374917984, "rewards/margins": 0.02002836763858795, "rewards/rejected": -0.20102375745773315, "step": 81 }, { "epoch": 0.21621621621621623, "grad_norm": 12.362634658813477, "learning_rate": 7.488908606921029e-06, "log_odds_chosen": 0.2158462405204773, "log_odds_ratio": -0.5972849130630493, "logits/chosen": -0.9262905716896057, "logits/rejected": -0.875029981136322, "logps/chosen": -1.7606620788574219, "logps/rejected": -1.9388712644577026, "loss": 2.9317, "nll_loss": 0.6732062697410583, "rewards/accuracies": 0.875, "rewards/chosen": -0.17606621980667114, "rewards/margins": 0.017820894718170166, "rewards/rejected": -0.1938871145248413, "step": 82 }, { "epoch": 0.2188529993408042, "grad_norm": 11.505016326904297, "learning_rate": 7.4818101153504875e-06, "log_odds_chosen": 0.3416220247745514, "log_odds_ratio": -0.5673665404319763, "logits/chosen": -0.9135525822639465, "logits/rejected": -0.8375486135482788, "logps/chosen": -1.6488395929336548, "logps/rejected": -1.938331127166748, "loss": 2.8808, "nll_loss": 0.6634570360183716, "rewards/accuracies": 0.75, "rewards/chosen": -0.16488397121429443, "rewards/margins": 0.02894916944205761, "rewards/rejected": -0.1938331425189972, "step": 83 }, { "epoch": 0.22148978246539222, "grad_norm": 12.884148597717285, "learning_rate": 7.474711623779946e-06, "log_odds_chosen": 0.4032338261604309, "log_odds_ratio": -0.5203972458839417, "logits/chosen": -0.9477750658988953, "logits/rejected": -0.9046843647956848, "logps/chosen": -1.8901233673095703, "logps/rejected": -2.2456259727478027, "loss": 3.3532, "nll_loss": 0.7862505912780762, "rewards/accuracies": 1.0, "rewards/chosen": -0.189012348651886, "rewards/margins": 0.035550251603126526, "rewards/rejected": -0.2245626151561737, "step": 84 }, { "epoch": 0.22412656558998023, "grad_norm": 11.539673805236816, "learning_rate": 7.467613132209405e-06, "log_odds_chosen": 0.13325880467891693, "log_odds_ratio": -0.6450076103210449, "logits/chosen": -0.9659244418144226, "logits/rejected": -0.900475263595581, "logps/chosen": -1.8413825035095215, "logps/rejected": -1.9536837339401245, "loss": 3.0989, "nll_loss": 0.7102184891700745, "rewards/accuracies": 0.5, "rewards/chosen": -0.1841382384300232, "rewards/margins": 0.011230124160647392, "rewards/rejected": -0.19536837935447693, "step": 85 }, { "epoch": 0.2267633487145682, "grad_norm": 12.20140552520752, "learning_rate": 7.460514640638864e-06, "log_odds_chosen": 0.178244948387146, "log_odds_ratio": -0.6124775409698486, "logits/chosen": -0.9916198253631592, "logits/rejected": -0.9214251041412354, "logps/chosen": -1.8683059215545654, "logps/rejected": -2.014869213104248, "loss": 3.8124, "nll_loss": 0.8918424844741821, "rewards/accuracies": 0.875, "rewards/chosen": -0.18683059513568878, "rewards/margins": 0.014656316488981247, "rewards/rejected": -0.20148691534996033, "step": 86 }, { "epoch": 0.22940013183915622, "grad_norm": 12.308550834655762, "learning_rate": 7.453416149068322e-06, "log_odds_chosen": 0.07327182590961456, "log_odds_ratio": -0.6613773107528687, "logits/chosen": -0.9867488741874695, "logits/rejected": -0.9688645601272583, "logps/chosen": -1.8495866060256958, "logps/rejected": -1.9149181842803955, "loss": 3.2487, "nll_loss": 0.7460330128669739, "rewards/accuracies": 0.625, "rewards/chosen": -0.18495866656303406, "rewards/margins": 0.006533162668347359, "rewards/rejected": -0.19149181246757507, "step": 87 }, { "epoch": 0.23203691496374423, "grad_norm": 11.27050495147705, "learning_rate": 7.446317657497782e-06, "log_odds_chosen": 0.2877511978149414, "log_odds_ratio": -0.5665717124938965, "logits/chosen": -1.0289583206176758, "logits/rejected": -0.9562087059020996, "logps/chosen": -1.7832393646240234, "logps/rejected": -2.0177910327911377, "loss": 2.9845, "nll_loss": 0.689464271068573, "rewards/accuracies": 0.875, "rewards/chosen": -0.1783239245414734, "rewards/margins": 0.02345517836511135, "rewards/rejected": -0.2017790973186493, "step": 88 }, { "epoch": 0.23467369808833224, "grad_norm": 11.31041145324707, "learning_rate": 7.43921916592724e-06, "log_odds_chosen": 0.2017640769481659, "log_odds_ratio": -0.6055243015289307, "logits/chosen": -1.0026249885559082, "logits/rejected": -0.9408939480781555, "logps/chosen": -1.8777215480804443, "logps/rejected": -2.05841064453125, "loss": 3.0338, "nll_loss": 0.6978951096534729, "rewards/accuracies": 0.75, "rewards/chosen": -0.18777216970920563, "rewards/margins": 0.01806892268359661, "rewards/rejected": -0.2058410793542862, "step": 89 }, { "epoch": 0.23731048121292023, "grad_norm": 11.861611366271973, "learning_rate": 7.432120674356698e-06, "log_odds_chosen": 0.3642200827598572, "log_odds_ratio": -0.5369521379470825, "logits/chosen": -1.0210542678833008, "logits/rejected": -0.9698840975761414, "logps/chosen": -1.7520647048950195, "logps/rejected": -2.046531915664673, "loss": 3.3146, "nll_loss": 0.7749478816986084, "rewards/accuracies": 0.875, "rewards/chosen": -0.1752064824104309, "rewards/margins": 0.02944672666490078, "rewards/rejected": -0.20465320348739624, "step": 90 }, { "epoch": 0.23994726433750824, "grad_norm": 11.018975257873535, "learning_rate": 7.425022182786158e-06, "log_odds_chosen": 0.36533892154693604, "log_odds_ratio": -0.534146785736084, "logits/chosen": -0.9736407995223999, "logits/rejected": -0.9505970478057861, "logps/chosen": -1.6512267589569092, "logps/rejected": -1.9399025440216064, "loss": 2.5182, "nll_loss": 0.5761348009109497, "rewards/accuracies": 0.875, "rewards/chosen": -0.16512268781661987, "rewards/margins": 0.028867574408650398, "rewards/rejected": -0.19399026036262512, "step": 91 }, { "epoch": 0.24258404746209625, "grad_norm": 10.942045211791992, "learning_rate": 7.417923691215616e-06, "log_odds_chosen": 0.2782820463180542, "log_odds_ratio": -0.587384819984436, "logits/chosen": -1.009594202041626, "logits/rejected": -0.9647431373596191, "logps/chosen": -1.606307029724121, "logps/rejected": -1.8420860767364502, "loss": 2.7104, "nll_loss": 0.6188517808914185, "rewards/accuracies": 0.75, "rewards/chosen": -0.16063068807125092, "rewards/margins": 0.023577921092510223, "rewards/rejected": -0.18420860171318054, "step": 92 }, { "epoch": 0.24522083058668426, "grad_norm": 11.825458526611328, "learning_rate": 7.410825199645076e-06, "log_odds_chosen": 0.19983872771263123, "log_odds_ratio": -0.605749785900116, "logits/chosen": -1.0021772384643555, "logits/rejected": -0.9581748247146606, "logps/chosen": -1.8522577285766602, "logps/rejected": -2.021942615509033, "loss": 3.1105, "nll_loss": 0.7170413732528687, "rewards/accuracies": 0.875, "rewards/chosen": -0.18522579967975616, "rewards/margins": 0.016968462616205215, "rewards/rejected": -0.2021942436695099, "step": 93 }, { "epoch": 0.24785761371127224, "grad_norm": 10.83332633972168, "learning_rate": 7.403726708074534e-06, "log_odds_chosen": 0.16420339047908783, "log_odds_ratio": -0.6234915256500244, "logits/chosen": -1.0329029560089111, "logits/rejected": -0.9871702194213867, "logps/chosen": -1.8671634197235107, "logps/rejected": -2.0037567615509033, "loss": 3.0731, "nll_loss": 0.7059216499328613, "rewards/accuracies": 0.75, "rewards/chosen": -0.18671634793281555, "rewards/margins": 0.013659341260790825, "rewards/rejected": -0.20037567615509033, "step": 94 }, { "epoch": 0.2504943968358603, "grad_norm": 11.22410774230957, "learning_rate": 7.3966282165039926e-06, "log_odds_chosen": 0.1453307718038559, "log_odds_ratio": -0.628348171710968, "logits/chosen": -1.0054070949554443, "logits/rejected": -0.9577959775924683, "logps/chosen": -1.8484101295471191, "logps/rejected": -1.9695494174957275, "loss": 3.3057, "nll_loss": 0.7635930776596069, "rewards/accuracies": 0.75, "rewards/chosen": -0.18484100699424744, "rewards/margins": 0.012113936245441437, "rewards/rejected": -0.19695493578910828, "step": 95 }, { "epoch": 0.25313117996044826, "grad_norm": 10.52470588684082, "learning_rate": 7.3895297249334515e-06, "log_odds_chosen": 0.392825186252594, "log_odds_ratio": -0.5319700241088867, "logits/chosen": -1.032947063446045, "logits/rejected": -0.9472732543945312, "logps/chosen": -1.6224782466888428, "logps/rejected": -1.9548505544662476, "loss": 2.8907, "nll_loss": 0.6694746017456055, "rewards/accuracies": 0.75, "rewards/chosen": -0.16224783658981323, "rewards/margins": 0.03323723375797272, "rewards/rejected": -0.19548507034778595, "step": 96 }, { "epoch": 0.25576796308503624, "grad_norm": 11.16720962524414, "learning_rate": 7.38243123336291e-06, "log_odds_chosen": 0.28609395027160645, "log_odds_ratio": -0.5633273720741272, "logits/chosen": -1.0286046266555786, "logits/rejected": -0.9637259244918823, "logps/chosen": -1.7094495296478271, "logps/rejected": -1.9480677843093872, "loss": 3.3181, "nll_loss": 0.7731999158859253, "rewards/accuracies": 1.0, "rewards/chosen": -0.1709449589252472, "rewards/margins": 0.023861827328801155, "rewards/rejected": -0.194806769490242, "step": 97 }, { "epoch": 0.2584047462096243, "grad_norm": 11.650616645812988, "learning_rate": 7.375332741792368e-06, "log_odds_chosen": 0.4847567677497864, "log_odds_ratio": -0.5067976117134094, "logits/chosen": -1.0179671049118042, "logits/rejected": -0.9550538659095764, "logps/chosen": -1.6734848022460938, "logps/rejected": -2.0797762870788574, "loss": 3.3886, "nll_loss": 0.7964633107185364, "rewards/accuracies": 0.875, "rewards/chosen": -0.1673484742641449, "rewards/margins": 0.04062914848327637, "rewards/rejected": -0.20797762274742126, "step": 98 }, { "epoch": 0.26104152933421226, "grad_norm": 11.267426490783691, "learning_rate": 7.368234250221827e-06, "log_odds_chosen": 0.3002548813819885, "log_odds_ratio": -0.5628231763839722, "logits/chosen": -1.0801610946655273, "logits/rejected": -0.9543227553367615, "logps/chosen": -1.6145151853561401, "logps/rejected": -1.8697000741958618, "loss": 3.1738, "nll_loss": 0.7371575236320496, "rewards/accuracies": 0.875, "rewards/chosen": -0.161451518535614, "rewards/margins": 0.02551848441362381, "rewards/rejected": -0.18696999549865723, "step": 99 }, { "epoch": 0.26367831245880025, "grad_norm": 10.404102325439453, "learning_rate": 7.361135758651286e-06, "log_odds_chosen": 0.09879818558692932, "log_odds_ratio": -0.6502638459205627, "logits/chosen": -0.9633969068527222, "logits/rejected": -0.9362624883651733, "logps/chosen": -1.6580994129180908, "logps/rejected": -1.7272868156433105, "loss": 2.69, "nll_loss": 0.6074641346931458, "rewards/accuracies": 0.625, "rewards/chosen": -0.16580992937088013, "rewards/margins": 0.006918755359947681, "rewards/rejected": -0.17272868752479553, "step": 100 }, { "epoch": 0.2663150955833883, "grad_norm": 10.600974082946777, "learning_rate": 7.354037267080744e-06, "log_odds_chosen": 0.29593682289123535, "log_odds_ratio": -0.5641055107116699, "logits/chosen": -1.0604907274246216, "logits/rejected": -0.9663894176483154, "logps/chosen": -1.7292687892913818, "logps/rejected": -1.9780974388122559, "loss": 2.912, "nll_loss": 0.6715894937515259, "rewards/accuracies": 0.75, "rewards/chosen": -0.1729268729686737, "rewards/margins": 0.024882866069674492, "rewards/rejected": -0.19780975580215454, "step": 101 }, { "epoch": 0.26895187870797627, "grad_norm": 11.276390075683594, "learning_rate": 7.346938775510204e-06, "log_odds_chosen": 0.36808863282203674, "log_odds_ratio": -0.5319298505783081, "logits/chosen": -1.072847843170166, "logits/rejected": -0.9937724471092224, "logps/chosen": -1.7114295959472656, "logps/rejected": -2.0213842391967773, "loss": 3.504, "nll_loss": 0.8228154182434082, "rewards/accuracies": 1.0, "rewards/chosen": -0.17114296555519104, "rewards/margins": 0.03099547140300274, "rewards/rejected": -0.20213845372200012, "step": 102 }, { "epoch": 0.27158866183256425, "grad_norm": 10.443772315979004, "learning_rate": 7.339840283939662e-06, "log_odds_chosen": 0.634390115737915, "log_odds_ratio": -0.4645514190196991, "logits/chosen": -1.0298651456832886, "logits/rejected": -0.9478103518486023, "logps/chosen": -1.4557925462722778, "logps/rejected": -1.982097864151001, "loss": 2.5977, "nll_loss": 0.6029644012451172, "rewards/accuracies": 1.0, "rewards/chosen": -0.1455792486667633, "rewards/margins": 0.052630532532930374, "rewards/rejected": -0.19820979237556458, "step": 103 }, { "epoch": 0.2742254449571523, "grad_norm": 10.725059509277344, "learning_rate": 7.332741792369122e-06, "log_odds_chosen": 0.23163697123527527, "log_odds_ratio": -0.5890691876411438, "logits/chosen": -1.0835130214691162, "logits/rejected": -1.0381077527999878, "logps/chosen": -1.6009089946746826, "logps/rejected": -1.7853424549102783, "loss": 3.0235, "nll_loss": 0.6969616413116455, "rewards/accuracies": 1.0, "rewards/chosen": -0.16009089350700378, "rewards/margins": 0.018443342298269272, "rewards/rejected": -0.17853423953056335, "step": 104 }, { "epoch": 0.27686222808174027, "grad_norm": 10.879688262939453, "learning_rate": 7.32564330079858e-06, "log_odds_chosen": 0.26793116331100464, "log_odds_ratio": -0.578827977180481, "logits/chosen": -1.0219061374664307, "logits/rejected": -0.9625946879386902, "logps/chosen": -1.7450555562973022, "logps/rejected": -1.9703395366668701, "loss": 3.4718, "nll_loss": 0.8100571632385254, "rewards/accuracies": 0.75, "rewards/chosen": -0.1745055615901947, "rewards/margins": 0.022528400644659996, "rewards/rejected": -0.19703397154808044, "step": 105 }, { "epoch": 0.2794990112063283, "grad_norm": 11.196900367736816, "learning_rate": 7.318544809228039e-06, "log_odds_chosen": 0.29670941829681396, "log_odds_ratio": -0.5586762428283691, "logits/chosen": -1.0425081253051758, "logits/rejected": -0.961692750453949, "logps/chosen": -1.6713957786560059, "logps/rejected": -1.9169774055480957, "loss": 3.293, "nll_loss": 0.7673801183700562, "rewards/accuracies": 1.0, "rewards/chosen": -0.16713958978652954, "rewards/margins": 0.02455814741551876, "rewards/rejected": -0.19169773161411285, "step": 106 }, { "epoch": 0.2821357943309163, "grad_norm": 10.09378433227539, "learning_rate": 7.311446317657498e-06, "log_odds_chosen": 0.18554000556468964, "log_odds_ratio": -0.607377290725708, "logits/chosen": -1.0471107959747314, "logits/rejected": -0.9914209842681885, "logps/chosen": -1.6027867794036865, "logps/rejected": -1.75523042678833, "loss": 2.7016, "nll_loss": 0.6146624684333801, "rewards/accuracies": 0.875, "rewards/chosen": -0.16027866303920746, "rewards/margins": 0.015244370326399803, "rewards/rejected": -0.17552302777767181, "step": 107 }, { "epoch": 0.28477257745550427, "grad_norm": 11.160083770751953, "learning_rate": 7.304347826086956e-06, "log_odds_chosen": 0.24081920087337494, "log_odds_ratio": -0.5894144773483276, "logits/chosen": -1.0816594362258911, "logits/rejected": -0.9936416149139404, "logps/chosen": -1.6756263971328735, "logps/rejected": -1.8702760934829712, "loss": 3.175, "nll_loss": 0.7348129153251648, "rewards/accuracies": 0.875, "rewards/chosen": -0.16756264865398407, "rewards/margins": 0.01946496218442917, "rewards/rejected": -0.18702760338783264, "step": 108 }, { "epoch": 0.2874093605800923, "grad_norm": 10.403040885925293, "learning_rate": 7.297249334516415e-06, "log_odds_chosen": 0.15198183059692383, "log_odds_ratio": -0.6254088878631592, "logits/chosen": -1.009718894958496, "logits/rejected": -0.9877020120620728, "logps/chosen": -1.6084790229797363, "logps/rejected": -1.7322394847869873, "loss": 2.6562, "nll_loss": 0.6015002727508545, "rewards/accuracies": 0.75, "rewards/chosen": -0.16084790229797363, "rewards/margins": 0.012376044876873493, "rewards/rejected": -0.17322394251823425, "step": 109 }, { "epoch": 0.2900461437046803, "grad_norm": 10.95654010772705, "learning_rate": 7.2901508429458735e-06, "log_odds_chosen": 0.19570016860961914, "log_odds_ratio": -0.6077170372009277, "logits/chosen": -1.0052906274795532, "logits/rejected": -0.9451830983161926, "logps/chosen": -1.676623821258545, "logps/rejected": -1.8326776027679443, "loss": 2.5621, "nll_loss": 0.5797582268714905, "rewards/accuracies": 0.75, "rewards/chosen": -0.1676623672246933, "rewards/margins": 0.015605399385094643, "rewards/rejected": -0.1832677721977234, "step": 110 }, { "epoch": 0.2926829268292683, "grad_norm": 11.047690391540527, "learning_rate": 7.283052351375332e-06, "log_odds_chosen": 0.27795833349227905, "log_odds_ratio": -0.5818898677825928, "logits/chosen": -1.0167365074157715, "logits/rejected": -0.9470656514167786, "logps/chosen": -1.6907787322998047, "logps/rejected": -1.9295769929885864, "loss": 2.6876, "nll_loss": 0.613703191280365, "rewards/accuracies": 0.625, "rewards/chosen": -0.16907787322998047, "rewards/margins": 0.023879820480942726, "rewards/rejected": -0.19295769929885864, "step": 111 }, { "epoch": 0.2953197099538563, "grad_norm": 9.781204223632812, "learning_rate": 7.275953859804791e-06, "log_odds_chosen": 0.37173882126808167, "log_odds_ratio": -0.5375426411628723, "logits/chosen": -1.0105595588684082, "logits/rejected": -0.9397248029708862, "logps/chosen": -1.6948808431625366, "logps/rejected": -2.002723217010498, "loss": 2.4168, "nll_loss": 0.5504346489906311, "rewards/accuracies": 1.0, "rewards/chosen": -0.1694880872964859, "rewards/margins": 0.030784228816628456, "rewards/rejected": -0.2002723217010498, "step": 112 }, { "epoch": 0.2979564930784443, "grad_norm": 9.886716842651367, "learning_rate": 7.26885536823425e-06, "log_odds_chosen": 0.30898672342300415, "log_odds_ratio": -0.5617222785949707, "logits/chosen": -1.0012271404266357, "logits/rejected": -0.9353881478309631, "logps/chosen": -1.5099105834960938, "logps/rejected": -1.7610509395599365, "loss": 2.3777, "nll_loss": 0.5382523536682129, "rewards/accuracies": 0.875, "rewards/chosen": -0.1509910672903061, "rewards/margins": 0.0251140296459198, "rewards/rejected": -0.17610511183738708, "step": 113 }, { "epoch": 0.3005932762030323, "grad_norm": 11.912915229797363, "learning_rate": 7.261756876663708e-06, "log_odds_chosen": 0.4974936246871948, "log_odds_ratio": -0.5436064600944519, "logits/chosen": -1.089568853378296, "logits/rejected": -0.9672592878341675, "logps/chosen": -1.887178659439087, "logps/rejected": -2.3397083282470703, "loss": 3.9354, "nll_loss": 0.9295009970664978, "rewards/accuracies": 0.625, "rewards/chosen": -0.18871785700321198, "rewards/margins": 0.0452529713511467, "rewards/rejected": -0.23397082090377808, "step": 114 }, { "epoch": 0.3032300593276203, "grad_norm": 10.737141609191895, "learning_rate": 7.254658385093168e-06, "log_odds_chosen": 0.2533874213695526, "log_odds_ratio": -0.5820384621620178, "logits/chosen": -0.9657982587814331, "logits/rejected": -0.8962007761001587, "logps/chosen": -1.7736114263534546, "logps/rejected": -1.9921071529388428, "loss": 3.5912, "nll_loss": 0.8395951390266418, "rewards/accuracies": 0.75, "rewards/chosen": -0.1773611307144165, "rewards/margins": 0.021849587559700012, "rewards/rejected": -0.1992107331752777, "step": 115 }, { "epoch": 0.3058668424522083, "grad_norm": 10.761970520019531, "learning_rate": 7.247559893522626e-06, "log_odds_chosen": 0.3130797743797302, "log_odds_ratio": -0.5591657161712646, "logits/chosen": -0.9927374124526978, "logits/rejected": -0.9318053722381592, "logps/chosen": -1.5499687194824219, "logps/rejected": -1.8103137016296387, "loss": 2.553, "nll_loss": 0.5823217034339905, "rewards/accuracies": 0.875, "rewards/chosen": -0.1549968719482422, "rewards/margins": 0.026034509763121605, "rewards/rejected": -0.18103139102458954, "step": 116 }, { "epoch": 0.3085036255767963, "grad_norm": 11.074972152709961, "learning_rate": 7.240461401952084e-06, "log_odds_chosen": 0.2793109118938446, "log_odds_ratio": -0.5649005174636841, "logits/chosen": -1.0155889987945557, "logits/rejected": -0.9369997978210449, "logps/chosen": -1.757372498512268, "logps/rejected": -1.9916359186172485, "loss": 2.9984, "nll_loss": 0.6931184530258179, "rewards/accuracies": 1.0, "rewards/chosen": -0.17573726177215576, "rewards/margins": 0.023426339030265808, "rewards/rejected": -0.19916360080242157, "step": 117 }, { "epoch": 0.3111404087013843, "grad_norm": 10.123946189880371, "learning_rate": 7.233362910381544e-06, "log_odds_chosen": 0.48958620429039, "log_odds_ratio": -0.5133712887763977, "logits/chosen": -0.988548994064331, "logits/rejected": -0.906058669090271, "logps/chosen": -1.6181275844573975, "logps/rejected": -2.0276811122894287, "loss": 3.082, "nll_loss": 0.7191726565361023, "rewards/accuracies": 1.0, "rewards/chosen": -0.16181275248527527, "rewards/margins": 0.04095536097884178, "rewards/rejected": -0.20276810228824615, "step": 118 }, { "epoch": 0.3137771918259723, "grad_norm": 9.875753402709961, "learning_rate": 7.226264418811002e-06, "log_odds_chosen": 0.27764010429382324, "log_odds_ratio": -0.5703195929527283, "logits/chosen": -0.9880169630050659, "logits/rejected": -0.9102073907852173, "logps/chosen": -1.6844210624694824, "logps/rejected": -1.9171109199523926, "loss": 2.8216, "nll_loss": 0.6483787894248962, "rewards/accuracies": 0.875, "rewards/chosen": -0.16844210028648376, "rewards/margins": 0.023268993943929672, "rewards/rejected": -0.19171112775802612, "step": 119 }, { "epoch": 0.31641397495056034, "grad_norm": 11.56274700164795, "learning_rate": 7.219165927240462e-06, "log_odds_chosen": 0.36386311054229736, "log_odds_ratio": -0.5368889570236206, "logits/chosen": -0.8859333395957947, "logits/rejected": -0.8559818267822266, "logps/chosen": -1.591422438621521, "logps/rejected": -1.8891998529434204, "loss": 2.8655, "nll_loss": 0.6626745462417603, "rewards/accuracies": 0.75, "rewards/chosen": -0.15914225578308105, "rewards/margins": 0.02977774105966091, "rewards/rejected": -0.18891999125480652, "step": 120 }, { "epoch": 0.3190507580751483, "grad_norm": 10.839788436889648, "learning_rate": 7.21206743566992e-06, "log_odds_chosen": 0.2564637064933777, "log_odds_ratio": -0.5788425803184509, "logits/chosen": -1.0680105686187744, "logits/rejected": -0.9847425222396851, "logps/chosen": -1.6119345426559448, "logps/rejected": -1.8226040601730347, "loss": 3.0163, "nll_loss": 0.6961902379989624, "rewards/accuracies": 1.0, "rewards/chosen": -0.16119346022605896, "rewards/margins": 0.021066950634121895, "rewards/rejected": -0.1822603940963745, "step": 121 }, { "epoch": 0.3216875411997363, "grad_norm": 10.007757186889648, "learning_rate": 7.204968944099379e-06, "log_odds_chosen": 0.2848505973815918, "log_odds_ratio": -0.5624546408653259, "logits/chosen": -1.0049948692321777, "logits/rejected": -0.9579980969429016, "logps/chosen": -1.6587774753570557, "logps/rejected": -1.8923622369766235, "loss": 2.8742, "nll_loss": 0.6623063087463379, "rewards/accuracies": 1.0, "rewards/chosen": -0.16587774455547333, "rewards/margins": 0.023358486592769623, "rewards/rejected": -0.18923622369766235, "step": 122 }, { "epoch": 0.32432432432432434, "grad_norm": 10.4876070022583, "learning_rate": 7.1978704525288375e-06, "log_odds_chosen": 0.36797964572906494, "log_odds_ratio": -0.5324498414993286, "logits/chosen": -1.0323530435562134, "logits/rejected": -0.9425293803215027, "logps/chosen": -1.6165523529052734, "logps/rejected": -1.9190058708190918, "loss": 3.169, "nll_loss": 0.7390009760856628, "rewards/accuracies": 1.0, "rewards/chosen": -0.1616552472114563, "rewards/margins": 0.030245332047343254, "rewards/rejected": -0.1919005811214447, "step": 123 }, { "epoch": 0.3269611074489123, "grad_norm": 9.802966117858887, "learning_rate": 7.190771960958296e-06, "log_odds_chosen": 0.13589303195476532, "log_odds_ratio": -0.6351217031478882, "logits/chosen": -1.019148826599121, "logits/rejected": -0.9820281267166138, "logps/chosen": -1.5778555870056152, "logps/rejected": -1.6937252283096313, "loss": 2.7745, "nll_loss": 0.6301135420799255, "rewards/accuracies": 0.75, "rewards/chosen": -0.157785564661026, "rewards/margins": 0.011586972512304783, "rewards/rejected": -0.1693725287914276, "step": 124 }, { "epoch": 0.3295978905735003, "grad_norm": 10.657818794250488, "learning_rate": 7.1836734693877545e-06, "log_odds_chosen": 0.13183920085430145, "log_odds_ratio": -0.6340517401695251, "logits/chosen": -0.9704450368881226, "logits/rejected": -0.9316169619560242, "logps/chosen": -1.5081100463867188, "logps/rejected": -1.6166660785675049, "loss": 2.9152, "nll_loss": 0.6654000282287598, "rewards/accuracies": 0.875, "rewards/chosen": -0.15081101655960083, "rewards/margins": 0.010855593718588352, "rewards/rejected": -0.161666601896286, "step": 125 }, { "epoch": 0.33223467369808835, "grad_norm": 11.420533180236816, "learning_rate": 7.176574977817213e-06, "log_odds_chosen": 0.09573184698820114, "log_odds_ratio": -0.6518878936767578, "logits/chosen": -1.0469801425933838, "logits/rejected": -0.981776773929596, "logps/chosen": -1.6422678232192993, "logps/rejected": -1.7189463376998901, "loss": 3.4448, "nll_loss": 0.7960150837898254, "rewards/accuracies": 0.5, "rewards/chosen": -0.16422678530216217, "rewards/margins": 0.007667848840355873, "rewards/rejected": -0.1718946397304535, "step": 126 }, { "epoch": 0.3348714568226763, "grad_norm": 10.23043441772461, "learning_rate": 7.169476486246672e-06, "log_odds_chosen": 0.2894551753997803, "log_odds_ratio": -0.5663176774978638, "logits/chosen": -1.011500358581543, "logits/rejected": -0.9723880887031555, "logps/chosen": -1.5216894149780273, "logps/rejected": -1.7557767629623413, "loss": 2.8609, "nll_loss": 0.6585900187492371, "rewards/accuracies": 1.0, "rewards/chosen": -0.15216895937919617, "rewards/margins": 0.02340874634683132, "rewards/rejected": -0.17557770013809204, "step": 127 }, { "epoch": 0.3375082399472643, "grad_norm": 9.419966697692871, "learning_rate": 7.16237799467613e-06, "log_odds_chosen": 0.5059428811073303, "log_odds_ratio": -0.5042520761489868, "logits/chosen": -0.993411123752594, "logits/rejected": -0.8944643139839172, "logps/chosen": -1.553409457206726, "logps/rejected": -1.9687423706054688, "loss": 2.466, "nll_loss": 0.5660730600357056, "rewards/accuracies": 0.875, "rewards/chosen": -0.15534095466136932, "rewards/margins": 0.04153328761458397, "rewards/rejected": -0.1968742460012436, "step": 128 }, { "epoch": 0.34014502307185235, "grad_norm": 11.141188621520996, "learning_rate": 7.15527950310559e-06, "log_odds_chosen": 0.28306683897972107, "log_odds_ratio": -0.5711504817008972, "logits/chosen": -0.9414308667182922, "logits/rejected": -0.8877083659172058, "logps/chosen": -1.7126646041870117, "logps/rejected": -1.9460575580596924, "loss": 3.4852, "nll_loss": 0.8141910433769226, "rewards/accuracies": 0.75, "rewards/chosen": -0.17126646637916565, "rewards/margins": 0.023339303210377693, "rewards/rejected": -0.1946057677268982, "step": 129 }, { "epoch": 0.34278180619644033, "grad_norm": 10.034116744995117, "learning_rate": 7.148181011535048e-06, "log_odds_chosen": 0.40334582328796387, "log_odds_ratio": -0.5304621458053589, "logits/chosen": -1.0032262802124023, "logits/rejected": -0.9937978982925415, "logps/chosen": -1.5170884132385254, "logps/rejected": -1.8561803102493286, "loss": 2.1653, "nll_loss": 0.4882797300815582, "rewards/accuracies": 0.875, "rewards/chosen": -0.15170885622501373, "rewards/margins": 0.033909182995557785, "rewards/rejected": -0.18561802804470062, "step": 130 }, { "epoch": 0.34541858932102837, "grad_norm": 11.854081153869629, "learning_rate": 7.141082519964508e-06, "log_odds_chosen": 0.26253604888916016, "log_odds_ratio": -0.5790401697158813, "logits/chosen": -1.0138558149337769, "logits/rejected": -0.8745430707931519, "logps/chosen": -1.7500555515289307, "logps/rejected": -1.9733306169509888, "loss": 3.988, "nll_loss": 0.9391055703163147, "rewards/accuracies": 0.875, "rewards/chosen": -0.17500554025173187, "rewards/margins": 0.022327521815896034, "rewards/rejected": -0.19733306765556335, "step": 131 }, { "epoch": 0.34805537244561635, "grad_norm": 10.411006927490234, "learning_rate": 7.133984028393966e-06, "log_odds_chosen": 0.1804034411907196, "log_odds_ratio": -0.6206458806991577, "logits/chosen": -1.014014720916748, "logits/rejected": -0.9626904129981995, "logps/chosen": -1.5718120336532593, "logps/rejected": -1.71071457862854, "loss": 2.8215, "nll_loss": 0.6433163285255432, "rewards/accuracies": 0.625, "rewards/chosen": -0.15718120336532593, "rewards/margins": 0.01389027014374733, "rewards/rejected": -0.17107146978378296, "step": 132 }, { "epoch": 0.35069215557020433, "grad_norm": 10.153091430664062, "learning_rate": 7.126885536823425e-06, "log_odds_chosen": 0.36744678020477295, "log_odds_ratio": -0.5389147996902466, "logits/chosen": -1.0156335830688477, "logits/rejected": -0.9082814455032349, "logps/chosen": -1.5546211004257202, "logps/rejected": -1.8483304977416992, "loss": 2.9649, "nll_loss": 0.6873387098312378, "rewards/accuracies": 1.0, "rewards/chosen": -0.1554621160030365, "rewards/margins": 0.02937093749642372, "rewards/rejected": -0.18483306467533112, "step": 133 }, { "epoch": 0.35332893869479237, "grad_norm": 10.791301727294922, "learning_rate": 7.119787045252884e-06, "log_odds_chosen": 0.2302854359149933, "log_odds_ratio": -0.5921584367752075, "logits/chosen": -1.0107197761535645, "logits/rejected": -0.9321932196617126, "logps/chosen": -1.6761658191680908, "logps/rejected": -1.8660277128219604, "loss": 3.1031, "nll_loss": 0.7165559530258179, "rewards/accuracies": 0.875, "rewards/chosen": -0.1676165759563446, "rewards/margins": 0.018986180424690247, "rewards/rejected": -0.18660277128219604, "step": 134 }, { "epoch": 0.35596572181938035, "grad_norm": 10.180030822753906, "learning_rate": 7.112688553682342e-06, "log_odds_chosen": 0.1279923915863037, "log_odds_ratio": -0.6372387409210205, "logits/chosen": -1.052191972732544, "logits/rejected": -0.9900830984115601, "logps/chosen": -1.6719307899475098, "logps/rejected": -1.7741820812225342, "loss": 2.889, "nll_loss": 0.6585352420806885, "rewards/accuracies": 0.75, "rewards/chosen": -0.16719307005405426, "rewards/margins": 0.010225137695670128, "rewards/rejected": -0.17741820216178894, "step": 135 }, { "epoch": 0.35860250494396834, "grad_norm": 11.69919490814209, "learning_rate": 7.105590062111801e-06, "log_odds_chosen": 0.2658090889453888, "log_odds_ratio": -0.5728399753570557, "logits/chosen": -0.9620678424835205, "logits/rejected": -0.8741431832313538, "logps/chosen": -1.8435702323913574, "logps/rejected": -2.0720698833465576, "loss": 3.7334, "nll_loss": 0.8760726451873779, "rewards/accuracies": 1.0, "rewards/chosen": -0.18435701727867126, "rewards/margins": 0.022849969565868378, "rewards/rejected": -0.20720697939395905, "step": 136 }, { "epoch": 0.3612392880685564, "grad_norm": 11.232748985290527, "learning_rate": 7.0984915705412596e-06, "log_odds_chosen": 0.11886944621801376, "log_odds_ratio": -0.6484676599502563, "logits/chosen": -0.9772336483001709, "logits/rejected": -0.9394592046737671, "logps/chosen": -1.7975430488586426, "logps/rejected": -1.8997228145599365, "loss": 3.1832, "nll_loss": 0.730952262878418, "rewards/accuracies": 0.625, "rewards/chosen": -0.17975430190563202, "rewards/margins": 0.010217983275651932, "rewards/rejected": -0.18997228145599365, "step": 137 }, { "epoch": 0.36387607119314436, "grad_norm": 9.921364784240723, "learning_rate": 7.0913930789707185e-06, "log_odds_chosen": 0.2520274221897125, "log_odds_ratio": -0.5773578882217407, "logits/chosen": -1.0217664241790771, "logits/rejected": -0.9497706890106201, "logps/chosen": -1.5283839702606201, "logps/rejected": -1.7314984798431396, "loss": 2.6404, "nll_loss": 0.6023762226104736, "rewards/accuracies": 1.0, "rewards/chosen": -0.15283840894699097, "rewards/margins": 0.02031143754720688, "rewards/rejected": -0.17314985394477844, "step": 138 }, { "epoch": 0.36651285431773234, "grad_norm": 10.041219711303711, "learning_rate": 7.0842945874001765e-06, "log_odds_chosen": 0.5415450930595398, "log_odds_ratio": -0.499300479888916, "logits/chosen": -1.0301642417907715, "logits/rejected": -0.9690549969673157, "logps/chosen": -1.467930793762207, "logps/rejected": -1.917942762374878, "loss": 2.6209, "nll_loss": 0.6052953004837036, "rewards/accuracies": 1.0, "rewards/chosen": -0.14679308235645294, "rewards/margins": 0.04500119388103485, "rewards/rejected": -0.1917942762374878, "step": 139 }, { "epoch": 0.3691496374423204, "grad_norm": 10.652571678161621, "learning_rate": 7.077196095829636e-06, "log_odds_chosen": 0.3802804946899414, "log_odds_ratio": -0.535650908946991, "logits/chosen": -1.0809695720672607, "logits/rejected": -1.0145455598831177, "logps/chosen": -1.6542302370071411, "logps/rejected": -1.9699711799621582, "loss": 3.3817, "nll_loss": 0.7918611764907837, "rewards/accuracies": 0.875, "rewards/chosen": -0.16542303562164307, "rewards/margins": 0.03157408535480499, "rewards/rejected": -0.19699713587760925, "step": 140 }, { "epoch": 0.37178642056690836, "grad_norm": 9.796138763427734, "learning_rate": 7.070097604259094e-06, "log_odds_chosen": 0.4387213885784149, "log_odds_ratio": -0.5061071515083313, "logits/chosen": -1.0042519569396973, "logits/rejected": -0.9524378776550293, "logps/chosen": -1.5022239685058594, "logps/rejected": -1.863775372505188, "loss": 2.5383, "nll_loss": 0.5839626789093018, "rewards/accuracies": 1.0, "rewards/chosen": -0.15022240579128265, "rewards/margins": 0.036155134439468384, "rewards/rejected": -0.18637755513191223, "step": 141 }, { "epoch": 0.3744232036914964, "grad_norm": 9.984831809997559, "learning_rate": 7.062999112688554e-06, "log_odds_chosen": 0.3700769245624542, "log_odds_ratio": -0.5443153381347656, "logits/chosen": -0.9836577773094177, "logits/rejected": -0.8929394483566284, "logps/chosen": -1.4981740713119507, "logps/rejected": -1.8099955320358276, "loss": 2.5823, "nll_loss": 0.5911481976509094, "rewards/accuracies": 0.75, "rewards/chosen": -0.14981740713119507, "rewards/margins": 0.031182145699858665, "rewards/rejected": -0.1809995472431183, "step": 142 }, { "epoch": 0.3770599868160844, "grad_norm": 9.898751258850098, "learning_rate": 7.055900621118012e-06, "log_odds_chosen": 0.20131006836891174, "log_odds_ratio": -0.6076530814170837, "logits/chosen": -0.9782786965370178, "logits/rejected": -0.9188566207885742, "logps/chosen": -1.6172821521759033, "logps/rejected": -1.7827471494674683, "loss": 2.7246, "nll_loss": 0.620381772518158, "rewards/accuracies": 0.75, "rewards/chosen": -0.16172820329666138, "rewards/margins": 0.016546515747904778, "rewards/rejected": -0.1782747209072113, "step": 143 }, { "epoch": 0.37969676994067236, "grad_norm": 12.325860023498535, "learning_rate": 7.04880212954747e-06, "log_odds_chosen": 0.15997439622879028, "log_odds_ratio": -0.6231362819671631, "logits/chosen": -0.9544463157653809, "logits/rejected": -0.938663125038147, "logps/chosen": -1.5355095863342285, "logps/rejected": -1.668229579925537, "loss": 3.154, "nll_loss": 0.7261742949485779, "rewards/accuracies": 0.75, "rewards/chosen": -0.15355095267295837, "rewards/margins": 0.013271997682750225, "rewards/rejected": -0.16682296991348267, "step": 144 }, { "epoch": 0.3823335530652604, "grad_norm": 10.043176651000977, "learning_rate": 7.04170363797693e-06, "log_odds_chosen": 0.4247394800186157, "log_odds_ratio": -0.5165842175483704, "logits/chosen": -0.9888424277305603, "logits/rejected": -0.9225731492042542, "logps/chosen": -1.6933906078338623, "logps/rejected": -2.0490479469299316, "loss": 2.5336, "nll_loss": 0.5817536115646362, "rewards/accuracies": 1.0, "rewards/chosen": -0.16933906078338623, "rewards/margins": 0.03556573763489723, "rewards/rejected": -0.20490480959415436, "step": 145 }, { "epoch": 0.3849703361898484, "grad_norm": 11.028958320617676, "learning_rate": 7.034605146406388e-06, "log_odds_chosen": 0.2050904929637909, "log_odds_ratio": -0.5977884531021118, "logits/chosen": -1.0422849655151367, "logits/rejected": -0.9397130012512207, "logps/chosen": -1.6625914573669434, "logps/rejected": -1.8326083421707153, "loss": 3.2628, "nll_loss": 0.7559204697608948, "rewards/accuracies": 1.0, "rewards/chosen": -0.16625916957855225, "rewards/margins": 0.017001673579216003, "rewards/rejected": -0.18326082825660706, "step": 146 }, { "epoch": 0.38760711931443637, "grad_norm": 10.177428245544434, "learning_rate": 7.027506654835847e-06, "log_odds_chosen": 0.10629764944314957, "log_odds_ratio": -0.6427984237670898, "logits/chosen": -1.002443790435791, "logits/rejected": -0.9608904719352722, "logps/chosen": -1.641714096069336, "logps/rejected": -1.7274234294891357, "loss": 2.886, "nll_loss": 0.6572229862213135, "rewards/accuracies": 0.875, "rewards/chosen": -0.16417142748832703, "rewards/margins": 0.00857093557715416, "rewards/rejected": -0.1727423369884491, "step": 147 }, { "epoch": 0.3902439024390244, "grad_norm": 10.105752944946289, "learning_rate": 7.020408163265306e-06, "log_odds_chosen": 0.3024708032608032, "log_odds_ratio": -0.5647812485694885, "logits/chosen": -0.9816720485687256, "logits/rejected": -0.9124754667282104, "logps/chosen": -1.5694180727005005, "logps/rejected": -1.821027159690857, "loss": 2.6204, "nll_loss": 0.5986314415931702, "rewards/accuracies": 0.75, "rewards/chosen": -0.15694180130958557, "rewards/margins": 0.025160912424325943, "rewards/rejected": -0.1821027249097824, "step": 148 }, { "epoch": 0.3928806855636124, "grad_norm": 10.754402160644531, "learning_rate": 7.013309671694765e-06, "log_odds_chosen": 0.26607027649879456, "log_odds_ratio": -0.5796156525611877, "logits/chosen": -0.8944635391235352, "logits/rejected": -0.8566243052482605, "logps/chosen": -1.5501165390014648, "logps/rejected": -1.7602980136871338, "loss": 2.4804, "nll_loss": 0.5621330142021179, "rewards/accuracies": 0.75, "rewards/chosen": -0.15501166880130768, "rewards/margins": 0.021018145605921745, "rewards/rejected": -0.17602980136871338, "step": 149 }, { "epoch": 0.39551746868820037, "grad_norm": 11.098362922668457, "learning_rate": 7.0062111801242236e-06, "log_odds_chosen": 0.2638225853443146, "log_odds_ratio": -0.5734692811965942, "logits/chosen": -0.9989528656005859, "logits/rejected": -0.9287225604057312, "logps/chosen": -1.4983904361724854, "logps/rejected": -1.7102032899856567, "loss": 3.3057, "nll_loss": 0.7690660953521729, "rewards/accuracies": 1.0, "rewards/chosen": -0.14983904361724854, "rewards/margins": 0.02118128165602684, "rewards/rejected": -0.17102032899856567, "step": 150 }, { "epoch": 0.3981542518127884, "grad_norm": 9.652806282043457, "learning_rate": 6.9991126885536825e-06, "log_odds_chosen": 0.4299401640892029, "log_odds_ratio": -0.5107702016830444, "logits/chosen": -1.0003033876419067, "logits/rejected": -0.9206913113594055, "logps/chosen": -1.4160411357879639, "logps/rejected": -1.749358057975769, "loss": 2.3457, "nll_loss": 0.5353503227233887, "rewards/accuracies": 0.875, "rewards/chosen": -0.14160412549972534, "rewards/margins": 0.03333168476819992, "rewards/rejected": -0.17493581771850586, "step": 151 }, { "epoch": 0.4007910349373764, "grad_norm": 10.215150833129883, "learning_rate": 6.9920141969831405e-06, "log_odds_chosen": 0.426888108253479, "log_odds_ratio": -0.5269747376441956, "logits/chosen": -0.988075852394104, "logits/rejected": -0.8986337184906006, "logps/chosen": -1.643608570098877, "logps/rejected": -1.999243140220642, "loss": 2.8702, "nll_loss": 0.6648507118225098, "rewards/accuracies": 0.875, "rewards/chosen": -0.16436085104942322, "rewards/margins": 0.03556346148252487, "rewards/rejected": -0.1999243199825287, "step": 152 }, { "epoch": 0.4034278180619644, "grad_norm": 10.009817123413086, "learning_rate": 6.984915705412599e-06, "log_odds_chosen": 0.3125130832195282, "log_odds_ratio": -0.5523189306259155, "logits/chosen": -1.021907091140747, "logits/rejected": -0.9471596479415894, "logps/chosen": -1.5990777015686035, "logps/rejected": -1.8522807359695435, "loss": 2.6808, "nll_loss": 0.6149685978889465, "rewards/accuracies": 1.0, "rewards/chosen": -0.1599077731370926, "rewards/margins": 0.025320306420326233, "rewards/rejected": -0.18522807955741882, "step": 153 }, { "epoch": 0.4060646011865524, "grad_norm": 9.670231819152832, "learning_rate": 6.977817213842058e-06, "log_odds_chosen": 0.2577518820762634, "log_odds_ratio": -0.5812107920646667, "logits/chosen": -0.9596846103668213, "logits/rejected": -0.907728910446167, "logps/chosen": -1.556383728981018, "logps/rejected": -1.764423131942749, "loss": 2.3379, "nll_loss": 0.5263651013374329, "rewards/accuracies": 1.0, "rewards/chosen": -0.15563836693763733, "rewards/margins": 0.02080395817756653, "rewards/rejected": -0.17644232511520386, "step": 154 }, { "epoch": 0.4087013843111404, "grad_norm": 10.299683570861816, "learning_rate": 6.970718722271516e-06, "log_odds_chosen": 0.21095338463783264, "log_odds_ratio": -0.6001787185668945, "logits/chosen": -1.0265400409698486, "logits/rejected": -0.9740742444992065, "logps/chosen": -1.7002348899841309, "logps/rejected": -1.8802061080932617, "loss": 3.0064, "nll_loss": 0.6915907263755798, "rewards/accuracies": 0.75, "rewards/chosen": -0.17002347111701965, "rewards/margins": 0.017997119575738907, "rewards/rejected": -0.18802061676979065, "step": 155 }, { "epoch": 0.41133816743572843, "grad_norm": 10.699474334716797, "learning_rate": 6.963620230700976e-06, "log_odds_chosen": 0.11351797729730606, "log_odds_ratio": -0.6480950117111206, "logits/chosen": -1.0430080890655518, "logits/rejected": -0.9336438179016113, "logps/chosen": -1.5621956586837769, "logps/rejected": -1.647605299949646, "loss": 3.4295, "nll_loss": 0.7925580739974976, "rewards/accuracies": 0.75, "rewards/chosen": -0.15621955692768097, "rewards/margins": 0.008540966548025608, "rewards/rejected": -0.1647605299949646, "step": 156 }, { "epoch": 0.4139749505603164, "grad_norm": 11.156679153442383, "learning_rate": 6.956521739130434e-06, "log_odds_chosen": 0.29023119807243347, "log_odds_ratio": -0.5665775537490845, "logits/chosen": -1.0512886047363281, "logits/rejected": -0.9587866067886353, "logps/chosen": -1.603646993637085, "logps/rejected": -1.841407060623169, "loss": 3.1337, "nll_loss": 0.7267646789550781, "rewards/accuracies": 0.75, "rewards/chosen": -0.16036470234394073, "rewards/margins": 0.023776013404130936, "rewards/rejected": -0.18414071202278137, "step": 157 }, { "epoch": 0.4166117336849044, "grad_norm": 10.709914207458496, "learning_rate": 6.949423247559894e-06, "log_odds_chosen": 0.15108612179756165, "log_odds_ratio": -0.622245728969574, "logits/chosen": -0.9538211822509766, "logits/rejected": -0.9229971170425415, "logps/chosen": -1.5492205619812012, "logps/rejected": -1.666764259338379, "loss": 3.1062, "nll_loss": 0.7143333554267883, "rewards/accuracies": 0.75, "rewards/chosen": -0.15492206811904907, "rewards/margins": 0.01175436470657587, "rewards/rejected": -0.16667643189430237, "step": 158 }, { "epoch": 0.41924851680949243, "grad_norm": 10.369595527648926, "learning_rate": 6.942324755989352e-06, "log_odds_chosen": 0.32474178075790405, "log_odds_ratio": -0.5587796568870544, "logits/chosen": -1.0268337726593018, "logits/rejected": -0.9584920406341553, "logps/chosen": -1.7907629013061523, "logps/rejected": -2.0647928714752197, "loss": 2.965, "nll_loss": 0.6853820085525513, "rewards/accuracies": 0.875, "rewards/chosen": -0.17907628417015076, "rewards/margins": 0.027402998879551888, "rewards/rejected": -0.2064792811870575, "step": 159 }, { "epoch": 0.4218852999340804, "grad_norm": 11.290220260620117, "learning_rate": 6.935226264418811e-06, "log_odds_chosen": 0.07872498780488968, "log_odds_ratio": -0.6619119644165039, "logits/chosen": -0.9851783514022827, "logits/rejected": -0.9211961030960083, "logps/chosen": -1.7138417959213257, "logps/rejected": -1.7746775150299072, "loss": 3.4049, "nll_loss": 0.7850258350372314, "rewards/accuracies": 0.5, "rewards/chosen": -0.17138418555259705, "rewards/margins": 0.006083549931645393, "rewards/rejected": -0.17746774852275848, "step": 160 }, { "epoch": 0.4245220830586684, "grad_norm": 9.70108413696289, "learning_rate": 6.92812777284827e-06, "log_odds_chosen": 0.22529563307762146, "log_odds_ratio": -0.596996009349823, "logits/chosen": -1.0223166942596436, "logits/rejected": -0.9788596034049988, "logps/chosen": -1.5264146327972412, "logps/rejected": -1.7136309146881104, "loss": 2.3278, "nll_loss": 0.5222612023353577, "rewards/accuracies": 0.625, "rewards/chosen": -0.15264146029949188, "rewards/margins": 0.018721627071499825, "rewards/rejected": -0.17136308550834656, "step": 161 }, { "epoch": 0.42715886618325644, "grad_norm": 10.300216674804688, "learning_rate": 6.921029281277728e-06, "log_odds_chosen": 0.18719755113124847, "log_odds_ratio": -0.6063538193702698, "logits/chosen": -1.0139224529266357, "logits/rejected": -0.9442136883735657, "logps/chosen": -1.4809929132461548, "logps/rejected": -1.6269291639328003, "loss": 2.6508, "nll_loss": 0.602075457572937, "rewards/accuracies": 0.875, "rewards/chosen": -0.14809930324554443, "rewards/margins": 0.01459362544119358, "rewards/rejected": -0.16269291937351227, "step": 162 }, { "epoch": 0.4297956493078444, "grad_norm": 9.955241203308105, "learning_rate": 6.913930789707187e-06, "log_odds_chosen": 0.3279910087585449, "log_odds_ratio": -0.5519675016403198, "logits/chosen": -1.0312724113464355, "logits/rejected": -0.9439166784286499, "logps/chosen": -1.6244471073150635, "logps/rejected": -1.8925652503967285, "loss": 2.7268, "nll_loss": 0.6265023350715637, "rewards/accuracies": 0.875, "rewards/chosen": -0.16244471073150635, "rewards/margins": 0.026811812072992325, "rewards/rejected": -0.18925653398036957, "step": 163 }, { "epoch": 0.43243243243243246, "grad_norm": 9.78976058959961, "learning_rate": 6.906832298136646e-06, "log_odds_chosen": 0.5778919458389282, "log_odds_ratio": -0.48515623807907104, "logits/chosen": -0.9530848264694214, "logits/rejected": -0.9065274596214294, "logps/chosen": -1.5215189456939697, "logps/rejected": -2.0028321743011475, "loss": 2.6913, "nll_loss": 0.6243013143539429, "rewards/accuracies": 0.875, "rewards/chosen": -0.15215188264846802, "rewards/margins": 0.04813132435083389, "rewards/rejected": -0.2002832144498825, "step": 164 }, { "epoch": 0.43506921555702044, "grad_norm": 10.509347915649414, "learning_rate": 6.8997338065661045e-06, "log_odds_chosen": 0.1852526068687439, "log_odds_ratio": -0.619766354560852, "logits/chosen": -1.0339908599853516, "logits/rejected": -0.9722583293914795, "logps/chosen": -1.6535886526107788, "logps/rejected": -1.7956442832946777, "loss": 2.839, "nll_loss": 0.6477658152580261, "rewards/accuracies": 0.75, "rewards/chosen": -0.16535887122154236, "rewards/margins": 0.014205573126673698, "rewards/rejected": -0.1795644313097, "step": 165 }, { "epoch": 0.4377059986816084, "grad_norm": 11.17563247680664, "learning_rate": 6.8926353149955626e-06, "log_odds_chosen": 0.2836954891681671, "log_odds_ratio": -0.5793655514717102, "logits/chosen": -1.0459768772125244, "logits/rejected": -0.9338748455047607, "logps/chosen": -1.7612978219985962, "logps/rejected": -2.000579595565796, "loss": 3.2258, "nll_loss": 0.7485017776489258, "rewards/accuracies": 0.75, "rewards/chosen": -0.1761297881603241, "rewards/margins": 0.023928172886371613, "rewards/rejected": -0.2000579535961151, "step": 166 }, { "epoch": 0.44034278180619646, "grad_norm": 10.128766059875488, "learning_rate": 6.885536823425022e-06, "log_odds_chosen": 0.18964844942092896, "log_odds_ratio": -0.6131159067153931, "logits/chosen": -0.9967179298400879, "logits/rejected": -0.9552919864654541, "logps/chosen": -1.8183844089508057, "logps/rejected": -1.985978126525879, "loss": 2.9409, "nll_loss": 0.6739104390144348, "rewards/accuracies": 0.625, "rewards/chosen": -0.18183845281600952, "rewards/margins": 0.016759376972913742, "rewards/rejected": -0.19859781861305237, "step": 167 }, { "epoch": 0.44297956493078444, "grad_norm": 9.803478240966797, "learning_rate": 6.87843833185448e-06, "log_odds_chosen": 0.35690954327583313, "log_odds_ratio": -0.5366029143333435, "logits/chosen": -1.0304672718048096, "logits/rejected": -0.960822582244873, "logps/chosen": -1.4720278978347778, "logps/rejected": -1.7538082599639893, "loss": 2.5161, "nll_loss": 0.5753771662712097, "rewards/accuracies": 1.0, "rewards/chosen": -0.14720278978347778, "rewards/margins": 0.028178047388792038, "rewards/rejected": -0.1753808557987213, "step": 168 }, { "epoch": 0.4456163480553724, "grad_norm": 10.489614486694336, "learning_rate": 6.87133984028394e-06, "log_odds_chosen": 0.29822301864624023, "log_odds_ratio": -0.5612882375717163, "logits/chosen": -0.9769358038902283, "logits/rejected": -0.9301931262016296, "logps/chosen": -1.4727394580841064, "logps/rejected": -1.703880786895752, "loss": 2.2927, "nll_loss": 0.5170445442199707, "rewards/accuracies": 0.875, "rewards/chosen": -0.1472739577293396, "rewards/margins": 0.023114126175642014, "rewards/rejected": -0.17038807272911072, "step": 169 }, { "epoch": 0.44825313117996046, "grad_norm": 9.719270706176758, "learning_rate": 6.864241348713398e-06, "log_odds_chosen": 0.1573425531387329, "log_odds_ratio": -0.6409673094749451, "logits/chosen": -0.965396523475647, "logits/rejected": -0.9352325201034546, "logps/chosen": -1.5652350187301636, "logps/rejected": -1.6846939325332642, "loss": 2.1732, "nll_loss": 0.47920310497283936, "rewards/accuracies": 0.75, "rewards/chosen": -0.15652349591255188, "rewards/margins": 0.011945885606110096, "rewards/rejected": -0.1684693992137909, "step": 170 }, { "epoch": 0.45088991430454844, "grad_norm": 10.072135925292969, "learning_rate": 6.857142857142856e-06, "log_odds_chosen": 0.2361280769109726, "log_odds_ratio": -0.5988917350769043, "logits/chosen": -1.0373730659484863, "logits/rejected": -0.97651207447052, "logps/chosen": -1.563853144645691, "logps/rejected": -1.746325135231018, "loss": 2.8339, "nll_loss": 0.6485767364501953, "rewards/accuracies": 0.75, "rewards/chosen": -0.15638533234596252, "rewards/margins": 0.018247190862894058, "rewards/rejected": -0.17463251948356628, "step": 171 }, { "epoch": 0.4535266974291364, "grad_norm": 9.880722999572754, "learning_rate": 6.850044365572316e-06, "log_odds_chosen": 0.42882829904556274, "log_odds_ratio": -0.5146551728248596, "logits/chosen": -0.9842818379402161, "logits/rejected": -0.956126868724823, "logps/chosen": -1.4900033473968506, "logps/rejected": -1.831408143043518, "loss": 2.1344, "nll_loss": 0.4821299910545349, "rewards/accuracies": 0.875, "rewards/chosen": -0.149000346660614, "rewards/margins": 0.03414047881960869, "rewards/rejected": -0.1831408143043518, "step": 172 }, { "epoch": 0.45616348055372447, "grad_norm": 9.939020156860352, "learning_rate": 6.842945874001774e-06, "log_odds_chosen": 0.4484916627407074, "log_odds_ratio": -0.5012741088867188, "logits/chosen": -1.0212557315826416, "logits/rejected": -0.894973874092102, "logps/chosen": -1.4865288734436035, "logps/rejected": -1.8439892530441284, "loss": 2.3634, "nll_loss": 0.5407203435897827, "rewards/accuracies": 1.0, "rewards/chosen": -0.14865288138389587, "rewards/margins": 0.03574604168534279, "rewards/rejected": -0.18439891934394836, "step": 173 }, { "epoch": 0.45880026367831245, "grad_norm": 9.039196968078613, "learning_rate": 6.835847382431233e-06, "log_odds_chosen": 0.24636773765087128, "log_odds_ratio": -0.5889366865158081, "logits/chosen": -0.9732996225357056, "logits/rejected": -0.9060419797897339, "logps/chosen": -1.3600660562515259, "logps/rejected": -1.5369794368743896, "loss": 2.1096, "nll_loss": 0.4685070514678955, "rewards/accuracies": 0.75, "rewards/chosen": -0.13600660860538483, "rewards/margins": 0.017691336572170258, "rewards/rejected": -0.1536979377269745, "step": 174 }, { "epoch": 0.4614370468029005, "grad_norm": 11.821978569030762, "learning_rate": 6.828748890860692e-06, "log_odds_chosen": 0.28139597177505493, "log_odds_ratio": -0.5732556581497192, "logits/chosen": -0.9052269458770752, "logits/rejected": -0.867675244808197, "logps/chosen": -1.664025902748108, "logps/rejected": -1.8929924964904785, "loss": 3.2741, "nll_loss": 0.7611905336380005, "rewards/accuracies": 0.75, "rewards/chosen": -0.16640257835388184, "rewards/margins": 0.022896669805049896, "rewards/rejected": -0.18929925560951233, "step": 175 }, { "epoch": 0.46407382992748847, "grad_norm": 10.669132232666016, "learning_rate": 6.821650399290151e-06, "log_odds_chosen": 0.2710084021091461, "log_odds_ratio": -0.5712127685546875, "logits/chosen": -0.9989784359931946, "logits/rejected": -0.9607273936271667, "logps/chosen": -1.7668498754501343, "logps/rejected": -1.9909987449645996, "loss": 3.1341, "nll_loss": 0.7264118790626526, "rewards/accuracies": 0.875, "rewards/chosen": -0.17668499052524567, "rewards/margins": 0.022414876148104668, "rewards/rejected": -0.19909986853599548, "step": 176 }, { "epoch": 0.46671061305207645, "grad_norm": 11.005374908447266, "learning_rate": 6.814551907719609e-06, "log_odds_chosen": 0.1589566171169281, "log_odds_ratio": -0.6214722394943237, "logits/chosen": -0.9853265285491943, "logits/rejected": -0.9289055466651917, "logps/chosen": -1.5828139781951904, "logps/rejected": -1.7081506252288818, "loss": 2.9991, "nll_loss": 0.6876333951950073, "rewards/accuracies": 0.875, "rewards/chosen": -0.1582813858985901, "rewards/margins": 0.012533656321465969, "rewards/rejected": -0.17081505060195923, "step": 177 }, { "epoch": 0.4693473961766645, "grad_norm": 10.49986743927002, "learning_rate": 6.8074534161490685e-06, "log_odds_chosen": 0.36675703525543213, "log_odds_ratio": -0.5418417453765869, "logits/chosen": -1.038415789604187, "logits/rejected": -0.9667816162109375, "logps/chosen": -1.541959285736084, "logps/rejected": -1.8625450134277344, "loss": 2.6794, "nll_loss": 0.6156629920005798, "rewards/accuracies": 0.875, "rewards/chosen": -0.15419591963291168, "rewards/margins": 0.032058581709861755, "rewards/rejected": -0.18625450134277344, "step": 178 }, { "epoch": 0.47198417930125247, "grad_norm": 9.203581809997559, "learning_rate": 6.800354924578527e-06, "log_odds_chosen": 0.17328761518001556, "log_odds_ratio": -0.6320229768753052, "logits/chosen": -0.9987534284591675, "logits/rejected": -0.9479336738586426, "logps/chosen": -1.4706135988235474, "logps/rejected": -1.6253565549850464, "loss": 2.2421, "nll_loss": 0.49731987714767456, "rewards/accuracies": 0.75, "rewards/chosen": -0.14706136286258698, "rewards/margins": 0.01547430083155632, "rewards/rejected": -0.1625356525182724, "step": 179 }, { "epoch": 0.47462096242584045, "grad_norm": 10.125992774963379, "learning_rate": 6.7932564330079855e-06, "log_odds_chosen": 0.26374509930610657, "log_odds_ratio": -0.5806645154953003, "logits/chosen": -1.076350212097168, "logits/rejected": -1.0171654224395752, "logps/chosen": -1.6135175228118896, "logps/rejected": -1.8273677825927734, "loss": 3.3221, "nll_loss": 0.7724688053131104, "rewards/accuracies": 0.875, "rewards/chosen": -0.1613517552614212, "rewards/margins": 0.021385014057159424, "rewards/rejected": -0.18273678421974182, "step": 180 }, { "epoch": 0.4772577455504285, "grad_norm": 10.200504302978516, "learning_rate": 6.786157941437444e-06, "log_odds_chosen": 0.2539365589618683, "log_odds_ratio": -0.5794248580932617, "logits/chosen": -0.9939213991165161, "logits/rejected": -0.9339665174484253, "logps/chosen": -1.4931640625, "logps/rejected": -1.6908729076385498, "loss": 2.5199, "nll_loss": 0.5720276236534119, "rewards/accuracies": 0.875, "rewards/chosen": -0.14931640028953552, "rewards/margins": 0.019770905375480652, "rewards/rejected": -0.16908732056617737, "step": 181 }, { "epoch": 0.4798945286750165, "grad_norm": 10.58282470703125, "learning_rate": 6.7790594498669024e-06, "log_odds_chosen": 0.21669375896453857, "log_odds_ratio": -0.5990947484970093, "logits/chosen": -1.0061519145965576, "logits/rejected": -0.9448709487915039, "logps/chosen": -1.753849744796753, "logps/rejected": -1.9302830696105957, "loss": 3.382, "nll_loss": 0.78557950258255, "rewards/accuracies": 0.75, "rewards/chosen": -0.175384983420372, "rewards/margins": 0.017643319442868233, "rewards/rejected": -0.1930283159017563, "step": 182 }, { "epoch": 0.48253131179960446, "grad_norm": 10.520997047424316, "learning_rate": 6.771960958296362e-06, "log_odds_chosen": 0.19941487908363342, "log_odds_ratio": -0.6015852689743042, "logits/chosen": -1.1057069301605225, "logits/rejected": -0.9830414056777954, "logps/chosen": -1.5218502283096313, "logps/rejected": -1.6809983253479004, "loss": 3.1066, "nll_loss": 0.7164870500564575, "rewards/accuracies": 1.0, "rewards/chosen": -0.15218502283096313, "rewards/margins": 0.015914827585220337, "rewards/rejected": -0.16809985041618347, "step": 183 }, { "epoch": 0.4851680949241925, "grad_norm": 10.83430004119873, "learning_rate": 6.76486246672582e-06, "log_odds_chosen": 0.21703600883483887, "log_odds_ratio": -0.592914879322052, "logits/chosen": -1.0694499015808105, "logits/rejected": -1.0465267896652222, "logps/chosen": -1.4693856239318848, "logps/rejected": -1.6387711763381958, "loss": 3.0489, "nll_loss": 0.7029221653938293, "rewards/accuracies": 1.0, "rewards/chosen": -0.14693856239318848, "rewards/margins": 0.016938570886850357, "rewards/rejected": -0.16387712955474854, "step": 184 }, { "epoch": 0.4878048780487805, "grad_norm": 9.578657150268555, "learning_rate": 6.757763975155279e-06, "log_odds_chosen": 0.15174290537834167, "log_odds_ratio": -0.6242505311965942, "logits/chosen": -1.0101323127746582, "logits/rejected": -0.9744507074356079, "logps/chosen": -1.3636577129364014, "logps/rejected": -1.4776711463928223, "loss": 2.3882, "nll_loss": 0.5346183180809021, "rewards/accuracies": 0.75, "rewards/chosen": -0.13636577129364014, "rewards/margins": 0.011401347815990448, "rewards/rejected": -0.14776712656021118, "step": 185 }, { "epoch": 0.4904416611733685, "grad_norm": 11.106609344482422, "learning_rate": 6.750665483584738e-06, "log_odds_chosen": 0.2596626579761505, "log_odds_ratio": -0.5743707418441772, "logits/chosen": -1.0113422870635986, "logits/rejected": -0.9298158288002014, "logps/chosen": -1.6257975101470947, "logps/rejected": -1.8388350009918213, "loss": 2.7506, "nll_loss": 0.6302106976509094, "rewards/accuracies": 0.875, "rewards/chosen": -0.162579745054245, "rewards/margins": 0.021303754299879074, "rewards/rejected": -0.18388350307941437, "step": 186 }, { "epoch": 0.4930784442979565, "grad_norm": 9.922343254089355, "learning_rate": 6.743566992014197e-06, "log_odds_chosen": 0.3453751802444458, "log_odds_ratio": -0.5418750047683716, "logits/chosen": -1.022857427597046, "logits/rejected": -0.9566177129745483, "logps/chosen": -1.563047170639038, "logps/rejected": -1.8398334980010986, "loss": 2.6673, "nll_loss": 0.6126459836959839, "rewards/accuracies": 0.875, "rewards/chosen": -0.1563047170639038, "rewards/margins": 0.027678625658154488, "rewards/rejected": -0.18398335576057434, "step": 187 }, { "epoch": 0.4957152274225445, "grad_norm": 10.53550910949707, "learning_rate": 6.736468500443656e-06, "log_odds_chosen": 0.3632819652557373, "log_odds_ratio": -0.5351804494857788, "logits/chosen": -0.975321352481842, "logits/rejected": -0.9194474816322327, "logps/chosen": -1.7827264070510864, "logps/rejected": -2.0951194763183594, "loss": 2.7061, "nll_loss": 0.6229962110519409, "rewards/accuracies": 0.875, "rewards/chosen": -0.17827263474464417, "rewards/margins": 0.031239299103617668, "rewards/rejected": -0.20951193571090698, "step": 188 }, { "epoch": 0.4983520105471325, "grad_norm": 10.74094009399414, "learning_rate": 6.729370008873114e-06, "log_odds_chosen": 0.23406413197517395, "log_odds_ratio": -0.591535747051239, "logits/chosen": -0.997490644454956, "logits/rejected": -0.8993905782699585, "logps/chosen": -1.6242046356201172, "logps/rejected": -1.8176889419555664, "loss": 2.9148, "nll_loss": 0.6695369482040405, "rewards/accuracies": 0.75, "rewards/chosen": -0.16242045164108276, "rewards/margins": 0.019348448142409325, "rewards/rejected": -0.18176892399787903, "step": 189 }, { "epoch": 0.5009887936717206, "grad_norm": 10.881436347961426, "learning_rate": 6.722271517302573e-06, "log_odds_chosen": 0.10114425420761108, "log_odds_ratio": -0.6511253714561462, "logits/chosen": -1.0813939571380615, "logits/rejected": -1.008556842803955, "logps/chosen": -1.4924644231796265, "logps/rejected": -1.5765280723571777, "loss": 3.0701, "nll_loss": 0.702400267124176, "rewards/accuracies": 0.5, "rewards/chosen": -0.1492464542388916, "rewards/margins": 0.008406376466155052, "rewards/rejected": -0.1576528251171112, "step": 190 }, { "epoch": 0.5036255767963085, "grad_norm": 10.459433555603027, "learning_rate": 6.715173025732032e-06, "log_odds_chosen": 0.3647010624408722, "log_odds_ratio": -0.535969078540802, "logits/chosen": -0.9934642314910889, "logits/rejected": -0.9312925338745117, "logps/chosen": -1.6314611434936523, "logps/rejected": -1.9391553401947021, "loss": 2.6815, "nll_loss": 0.6167663931846619, "rewards/accuracies": 1.0, "rewards/chosen": -0.16314613819122314, "rewards/margins": 0.030769426375627518, "rewards/rejected": -0.19391554594039917, "step": 191 }, { "epoch": 0.5062623599208965, "grad_norm": 10.44395637512207, "learning_rate": 6.708074534161491e-06, "log_odds_chosen": 0.3406563997268677, "log_odds_ratio": -0.5445241928100586, "logits/chosen": -1.0486533641815186, "logits/rejected": -0.9384101629257202, "logps/chosen": -1.4763271808624268, "logps/rejected": -1.745421051979065, "loss": 3.3578, "nll_loss": 0.7850080132484436, "rewards/accuracies": 1.0, "rewards/chosen": -0.14763271808624268, "rewards/margins": 0.02690940722823143, "rewards/rejected": -0.1745421141386032, "step": 192 }, { "epoch": 0.5088991430454846, "grad_norm": 10.408292770385742, "learning_rate": 6.700976042590949e-06, "log_odds_chosen": 0.3092145323753357, "log_odds_ratio": -0.5610625743865967, "logits/chosen": -1.0052587985992432, "logits/rejected": -0.962735652923584, "logps/chosen": -1.5190422534942627, "logps/rejected": -1.7629897594451904, "loss": 2.7384, "nll_loss": 0.6284852027893066, "rewards/accuracies": 0.875, "rewards/chosen": -0.15190422534942627, "rewards/margins": 0.024394752457737923, "rewards/rejected": -0.17629897594451904, "step": 193 }, { "epoch": 0.5115359261700725, "grad_norm": 9.567964553833008, "learning_rate": 6.693877551020408e-06, "log_odds_chosen": 0.22837528586387634, "log_odds_ratio": -0.591896116733551, "logits/chosen": -1.0158699750900269, "logits/rejected": -0.9633442759513855, "logps/chosen": -1.5977767705917358, "logps/rejected": -1.7741581201553345, "loss": 2.7471, "nll_loss": 0.6275816559791565, "rewards/accuracies": 0.75, "rewards/chosen": -0.1597776710987091, "rewards/margins": 0.01763814315199852, "rewards/rejected": -0.17741578817367554, "step": 194 }, { "epoch": 0.5141727092946605, "grad_norm": 9.766575813293457, "learning_rate": 6.6867790594498664e-06, "log_odds_chosen": 0.3533956706523895, "log_odds_ratio": -0.5434699058532715, "logits/chosen": -1.0098570585250854, "logits/rejected": -0.9270682334899902, "logps/chosen": -1.5814838409423828, "logps/rejected": -1.865525722503662, "loss": 2.5237, "nll_loss": 0.5765711665153503, "rewards/accuracies": 0.875, "rewards/chosen": -0.1581483781337738, "rewards/margins": 0.028404179960489273, "rewards/rejected": -0.18655255436897278, "step": 195 }, { "epoch": 0.5168094924192486, "grad_norm": 10.196769714355469, "learning_rate": 6.679680567879326e-06, "log_odds_chosen": 0.28227320313453674, "log_odds_ratio": -0.5764878988265991, "logits/chosen": -0.968219518661499, "logits/rejected": -0.9135010242462158, "logps/chosen": -1.6530375480651855, "logps/rejected": -1.8860328197479248, "loss": 2.6832, "nll_loss": 0.6131478548049927, "rewards/accuracies": 0.75, "rewards/chosen": -0.1653037667274475, "rewards/margins": 0.023299511522054672, "rewards/rejected": -0.18860328197479248, "step": 196 }, { "epoch": 0.5194462755438365, "grad_norm": 10.448760032653809, "learning_rate": 6.672582076308784e-06, "log_odds_chosen": 0.5269650816917419, "log_odds_ratio": -0.47726505994796753, "logits/chosen": -0.9919060468673706, "logits/rejected": -0.917604923248291, "logps/chosen": -1.4839609861373901, "logps/rejected": -1.910081386566162, "loss": 2.7142, "nll_loss": 0.6308342814445496, "rewards/accuracies": 1.0, "rewards/chosen": -0.1483961045742035, "rewards/margins": 0.042612046003341675, "rewards/rejected": -0.19100815057754517, "step": 197 }, { "epoch": 0.5220830586684245, "grad_norm": 10.519347190856934, "learning_rate": 6.665483584738242e-06, "log_odds_chosen": 0.38204941153526306, "log_odds_ratio": -0.5332264304161072, "logits/chosen": -1.0032997131347656, "logits/rejected": -0.9247130155563354, "logps/chosen": -1.761028528213501, "logps/rejected": -2.0825135707855225, "loss": 3.2928, "nll_loss": 0.7698801159858704, "rewards/accuracies": 0.875, "rewards/chosen": -0.17610284686088562, "rewards/margins": 0.03214849531650543, "rewards/rejected": -0.20825135707855225, "step": 198 }, { "epoch": 0.5247198417930126, "grad_norm": 9.883003234863281, "learning_rate": 6.658385093167702e-06, "log_odds_chosen": 0.7173312306404114, "log_odds_ratio": -0.44229722023010254, "logits/chosen": -1.0049068927764893, "logits/rejected": -0.9245249032974243, "logps/chosen": -1.4914054870605469, "logps/rejected": -2.0990054607391357, "loss": 2.6475, "nll_loss": 0.6176378130912781, "rewards/accuracies": 1.0, "rewards/chosen": -0.14914055168628693, "rewards/margins": 0.06075998395681381, "rewards/rejected": -0.20990052819252014, "step": 199 }, { "epoch": 0.5273566249176005, "grad_norm": 9.658519744873047, "learning_rate": 6.65128660159716e-06, "log_odds_chosen": 0.30500340461730957, "log_odds_ratio": -0.5833555459976196, "logits/chosen": -1.0268594026565552, "logits/rejected": -0.938892126083374, "logps/chosen": -1.7157214879989624, "logps/rejected": -1.963449239730835, "loss": 2.9324, "nll_loss": 0.6747677326202393, "rewards/accuracies": 0.625, "rewards/chosen": -0.17157214879989624, "rewards/margins": 0.024772752076387405, "rewards/rejected": -0.19634489715099335, "step": 200 }, { "epoch": 0.5299934080421885, "grad_norm": 10.24622631072998, "learning_rate": 6.644188110026619e-06, "log_odds_chosen": 0.5697795748710632, "log_odds_ratio": -0.45827436447143555, "logits/chosen": -1.0016233921051025, "logits/rejected": -0.8836263418197632, "logps/chosen": -1.4620568752288818, "logps/rejected": -1.9116840362548828, "loss": 2.6193, "nll_loss": 0.6090010404586792, "rewards/accuracies": 1.0, "rewards/chosen": -0.14620567858219147, "rewards/margins": 0.044962719082832336, "rewards/rejected": -0.1911683976650238, "step": 201 }, { "epoch": 0.5326301911667766, "grad_norm": 9.450702667236328, "learning_rate": 6.637089618456078e-06, "log_odds_chosen": 0.20333410799503326, "log_odds_ratio": -0.6024256944656372, "logits/chosen": -0.940944492816925, "logits/rejected": -0.8981947898864746, "logps/chosen": -1.5878612995147705, "logps/rejected": -1.7429168224334717, "loss": 2.141, "nll_loss": 0.4750024378299713, "rewards/accuracies": 0.875, "rewards/chosen": -0.15878614783287048, "rewards/margins": 0.015505557879805565, "rewards/rejected": -0.1742917001247406, "step": 202 }, { "epoch": 0.5352669742913645, "grad_norm": 10.130104064941406, "learning_rate": 6.629991126885537e-06, "log_odds_chosen": 0.4129548966884613, "log_odds_ratio": -0.5138856768608093, "logits/chosen": -1.0429551601409912, "logits/rejected": -0.9169206619262695, "logps/chosen": -1.3947268724441528, "logps/rejected": -1.723283290863037, "loss": 2.6488, "nll_loss": 0.6108206510543823, "rewards/accuracies": 1.0, "rewards/chosen": -0.13947269320487976, "rewards/margins": 0.03285562992095947, "rewards/rejected": -0.17232832312583923, "step": 203 }, { "epoch": 0.5379037574159525, "grad_norm": 10.768418312072754, "learning_rate": 6.622892635314995e-06, "log_odds_chosen": 0.2619969844818115, "log_odds_ratio": -0.5781389474868774, "logits/chosen": -1.0080143213272095, "logits/rejected": -0.888601541519165, "logps/chosen": -1.6371673345565796, "logps/rejected": -1.8551280498504639, "loss": 3.149, "nll_loss": 0.7294327020645142, "rewards/accuracies": 0.875, "rewards/chosen": -0.16371673345565796, "rewards/margins": 0.021796071901917458, "rewards/rejected": -0.18551281094551086, "step": 204 }, { "epoch": 0.5405405405405406, "grad_norm": 9.554441452026367, "learning_rate": 6.615794143744455e-06, "log_odds_chosen": 0.24392247200012207, "log_odds_ratio": -0.5857117176055908, "logits/chosen": -0.9610693454742432, "logits/rejected": -0.9121558666229248, "logps/chosen": -1.5005481243133545, "logps/rejected": -1.6922739744186401, "loss": 2.1473, "nll_loss": 0.4782421886920929, "rewards/accuracies": 0.875, "rewards/chosen": -0.15005481243133545, "rewards/margins": 0.01917259395122528, "rewards/rejected": -0.16922740638256073, "step": 205 }, { "epoch": 0.5431773236651285, "grad_norm": 9.785226821899414, "learning_rate": 6.608695652173913e-06, "log_odds_chosen": 0.6710850596427917, "log_odds_ratio": -0.43129992485046387, "logits/chosen": -0.9875367283821106, "logits/rejected": -0.9106200933456421, "logps/chosen": -1.3776631355285645, "logps/rejected": -1.898719072341919, "loss": 2.1726, "nll_loss": 0.50001060962677, "rewards/accuracies": 1.0, "rewards/chosen": -0.13776631653308868, "rewards/margins": 0.05210559815168381, "rewards/rejected": -0.1898719072341919, "step": 206 }, { "epoch": 0.5458141067897165, "grad_norm": 10.392133712768555, "learning_rate": 6.6015971606033715e-06, "log_odds_chosen": 0.3169119656085968, "log_odds_ratio": -0.5607678294181824, "logits/chosen": -1.047536015510559, "logits/rejected": -0.9540445804595947, "logps/chosen": -1.5951440334320068, "logps/rejected": -1.8588143587112427, "loss": 2.6913, "nll_loss": 0.6167535781860352, "rewards/accuracies": 0.75, "rewards/chosen": -0.1595143973827362, "rewards/margins": 0.026367036625742912, "rewards/rejected": -0.18588143587112427, "step": 207 }, { "epoch": 0.5484508899143046, "grad_norm": 10.140395164489746, "learning_rate": 6.5944986690328304e-06, "log_odds_chosen": 0.33773016929626465, "log_odds_ratio": -0.5480486154556274, "logits/chosen": -1.039228916168213, "logits/rejected": -0.966877281665802, "logps/chosen": -1.5897202491760254, "logps/rejected": -1.866443157196045, "loss": 2.878, "nll_loss": 0.6646842956542969, "rewards/accuracies": 0.875, "rewards/chosen": -0.15897202491760254, "rewards/margins": 0.027672285214066505, "rewards/rejected": -0.1866443157196045, "step": 208 }, { "epoch": 0.5510876730388925, "grad_norm": 9.475105285644531, "learning_rate": 6.5874001774622885e-06, "log_odds_chosen": 0.27345022559165955, "log_odds_ratio": -0.5832695364952087, "logits/chosen": -1.0278539657592773, "logits/rejected": -0.9749932885169983, "logps/chosen": -1.287369966506958, "logps/rejected": -1.5093766450881958, "loss": 2.2055, "nll_loss": 0.4930441975593567, "rewards/accuracies": 0.75, "rewards/chosen": -0.12873700261116028, "rewards/margins": 0.022200649604201317, "rewards/rejected": -0.15093766152858734, "step": 209 }, { "epoch": 0.5537244561634805, "grad_norm": 9.57432746887207, "learning_rate": 6.580301685891748e-06, "log_odds_chosen": 0.3678041100502014, "log_odds_ratio": -0.5408501029014587, "logits/chosen": -1.0283927917480469, "logits/rejected": -0.9835280179977417, "logps/chosen": -1.4167461395263672, "logps/rejected": -1.708855152130127, "loss": 2.3869, "nll_loss": 0.5426478385925293, "rewards/accuracies": 0.875, "rewards/chosen": -0.14167462289333344, "rewards/margins": 0.029210904613137245, "rewards/rejected": -0.17088551819324493, "step": 210 }, { "epoch": 0.5563612392880686, "grad_norm": 10.188741683959961, "learning_rate": 6.573203194321206e-06, "log_odds_chosen": 0.3999137878417969, "log_odds_ratio": -0.5197833776473999, "logits/chosen": -0.9907450675964355, "logits/rejected": -0.9520745277404785, "logps/chosen": -1.543394923210144, "logps/rejected": -1.870898962020874, "loss": 2.4805, "nll_loss": 0.5681428909301758, "rewards/accuracies": 1.0, "rewards/chosen": -0.1543394923210144, "rewards/margins": 0.03275042027235031, "rewards/rejected": -0.1870899200439453, "step": 211 }, { "epoch": 0.5589980224126566, "grad_norm": 10.253950119018555, "learning_rate": 6.566104702750665e-06, "log_odds_chosen": 0.3432654142379761, "log_odds_ratio": -0.5483474731445312, "logits/chosen": -1.0167362689971924, "logits/rejected": -0.9465508460998535, "logps/chosen": -1.4691851139068604, "logps/rejected": -1.7421796321868896, "loss": 2.5392, "nll_loss": 0.5799700021743774, "rewards/accuracies": 0.875, "rewards/chosen": -0.14691850543022156, "rewards/margins": 0.027299458160996437, "rewards/rejected": -0.17421796917915344, "step": 212 }, { "epoch": 0.5616348055372445, "grad_norm": 9.691521644592285, "learning_rate": 6.559006211180124e-06, "log_odds_chosen": 0.2519860863685608, "log_odds_ratio": -0.5869906544685364, "logits/chosen": -0.9782098531723022, "logits/rejected": -0.9147440791130066, "logps/chosen": -1.5072137117385864, "logps/rejected": -1.727609634399414, "loss": 2.343, "nll_loss": 0.5270425081253052, "rewards/accuracies": 0.875, "rewards/chosen": -0.15072137117385864, "rewards/margins": 0.02203957922756672, "rewards/rejected": -0.1727609634399414, "step": 213 }, { "epoch": 0.5642715886618326, "grad_norm": 9.183805465698242, "learning_rate": 6.551907719609583e-06, "log_odds_chosen": 0.4426887035369873, "log_odds_ratio": -0.5064529776573181, "logits/chosen": -0.969307541847229, "logits/rejected": -0.8896834254264832, "logps/chosen": -1.4110374450683594, "logps/rejected": -1.7519762516021729, "loss": 1.8554, "nll_loss": 0.4132058620452881, "rewards/accuracies": 1.0, "rewards/chosen": -0.14110374450683594, "rewards/margins": 0.03409387543797493, "rewards/rejected": -0.17519763112068176, "step": 214 }, { "epoch": 0.5669083717864206, "grad_norm": 10.12009048461914, "learning_rate": 6.544809228039041e-06, "log_odds_chosen": 0.3893926739692688, "log_odds_ratio": -0.5226792097091675, "logits/chosen": -1.0572798252105713, "logits/rejected": -0.9570890665054321, "logps/chosen": -1.468374490737915, "logps/rejected": -1.7834434509277344, "loss": 2.8241, "nll_loss": 0.6537682414054871, "rewards/accuracies": 1.0, "rewards/chosen": -0.14683745801448822, "rewards/margins": 0.031506896018981934, "rewards/rejected": -0.17834435403347015, "step": 215 }, { "epoch": 0.5695451549110085, "grad_norm": 10.187172889709473, "learning_rate": 6.5377107364685e-06, "log_odds_chosen": 0.41759955883026123, "log_odds_ratio": -0.510657787322998, "logits/chosen": -1.046910285949707, "logits/rejected": -0.9419330358505249, "logps/chosen": -1.576021432876587, "logps/rejected": -1.9195055961608887, "loss": 2.7863, "nll_loss": 0.6455209851264954, "rewards/accuracies": 1.0, "rewards/chosen": -0.15760213136672974, "rewards/margins": 0.03434841334819794, "rewards/rejected": -0.19195055961608887, "step": 216 }, { "epoch": 0.5721819380355966, "grad_norm": 10.267085075378418, "learning_rate": 6.530612244897959e-06, "log_odds_chosen": 0.2733405530452728, "log_odds_ratio": -0.56819748878479, "logits/chosen": -1.02101731300354, "logits/rejected": -0.9294899702072144, "logps/chosen": -1.7061502933502197, "logps/rejected": -1.9277353286743164, "loss": 2.8517, "nll_loss": 0.6561131477355957, "rewards/accuracies": 1.0, "rewards/chosen": -0.1706150472164154, "rewards/margins": 0.022158479318022728, "rewards/rejected": -0.1927735060453415, "step": 217 }, { "epoch": 0.5748187211601846, "grad_norm": 9.67136287689209, "learning_rate": 6.523513753327418e-06, "log_odds_chosen": 0.5332562923431396, "log_odds_ratio": -0.468158483505249, "logits/chosen": -1.0288969278335571, "logits/rejected": -0.9056495428085327, "logps/chosen": -1.6826705932617188, "logps/rejected": -2.130521774291992, "loss": 2.7361, "nll_loss": 0.6372010707855225, "rewards/accuracies": 1.0, "rewards/chosen": -0.16826705634593964, "rewards/margins": 0.04478512704372406, "rewards/rejected": -0.2130521833896637, "step": 218 }, { "epoch": 0.5774555042847725, "grad_norm": 9.922078132629395, "learning_rate": 6.516415261756877e-06, "log_odds_chosen": 0.2611735165119171, "log_odds_ratio": -0.5755319595336914, "logits/chosen": -1.0483546257019043, "logits/rejected": -0.9757335186004639, "logps/chosen": -1.6353774070739746, "logps/rejected": -1.8514364957809448, "loss": 2.7073, "nll_loss": 0.6192625164985657, "rewards/accuracies": 1.0, "rewards/chosen": -0.16353774070739746, "rewards/margins": 0.02160591259598732, "rewards/rejected": -0.18514364957809448, "step": 219 }, { "epoch": 0.5800922874093606, "grad_norm": 10.130531311035156, "learning_rate": 6.509316770186335e-06, "log_odds_chosen": 0.4569411873817444, "log_odds_ratio": -0.5014762878417969, "logits/chosen": -1.0507677793502808, "logits/rejected": -1.014943242073059, "logps/chosen": -1.3462841510772705, "logps/rejected": -1.6764225959777832, "loss": 2.678, "nll_loss": 0.6193474531173706, "rewards/accuracies": 1.0, "rewards/chosen": -0.13462841510772705, "rewards/margins": 0.03301384299993515, "rewards/rejected": -0.1676422655582428, "step": 220 }, { "epoch": 0.5827290705339486, "grad_norm": 9.818591117858887, "learning_rate": 6.5022182786157944e-06, "log_odds_chosen": 0.33437085151672363, "log_odds_ratio": -0.5524057149887085, "logits/chosen": -1.0554800033569336, "logits/rejected": -0.9858191609382629, "logps/chosen": -1.5950689315795898, "logps/rejected": -1.8567230701446533, "loss": 3.0333, "nll_loss": 0.7030817270278931, "rewards/accuracies": 0.875, "rewards/chosen": -0.1595069169998169, "rewards/margins": 0.026165399700403214, "rewards/rejected": -0.1856723129749298, "step": 221 }, { "epoch": 0.5853658536585366, "grad_norm": 9.592183113098145, "learning_rate": 6.4951197870452525e-06, "log_odds_chosen": 0.3747381865978241, "log_odds_ratio": -0.5308906435966492, "logits/chosen": -1.023972988128662, "logits/rejected": -0.9763661623001099, "logps/chosen": -1.470754861831665, "logps/rejected": -1.762098789215088, "loss": 2.3964, "nll_loss": 0.5460025072097778, "rewards/accuracies": 1.0, "rewards/chosen": -0.14707550406455994, "rewards/margins": 0.02913437783718109, "rewards/rejected": -0.17620986700057983, "step": 222 }, { "epoch": 0.5880026367831246, "grad_norm": 10.06408977508545, "learning_rate": 6.4880212954747105e-06, "log_odds_chosen": 0.1953536868095398, "log_odds_ratio": -0.6110211610794067, "logits/chosen": -0.9953948259353638, "logits/rejected": -0.9426294565200806, "logps/chosen": -1.5649466514587402, "logps/rejected": -1.721879243850708, "loss": 2.5598, "nll_loss": 0.5788469910621643, "rewards/accuracies": 0.75, "rewards/chosen": -0.15649467706680298, "rewards/margins": 0.015693247318267822, "rewards/rejected": -0.1721879243850708, "step": 223 }, { "epoch": 0.5906394199077126, "grad_norm": 10.529962539672852, "learning_rate": 6.48092280390417e-06, "log_odds_chosen": 0.22896939516067505, "log_odds_ratio": -0.5931162238121033, "logits/chosen": -0.9920583963394165, "logits/rejected": -0.9440720677375793, "logps/chosen": -1.6963231563568115, "logps/rejected": -1.8891266584396362, "loss": 2.6771, "nll_loss": 0.6099545955657959, "rewards/accuracies": 0.875, "rewards/chosen": -0.16963233053684235, "rewards/margins": 0.019280334934592247, "rewards/rejected": -0.18891265988349915, "step": 224 }, { "epoch": 0.5932762030323006, "grad_norm": 9.350930213928223, "learning_rate": 6.473824312333628e-06, "log_odds_chosen": 0.46141666173934937, "log_odds_ratio": -0.4986426830291748, "logits/chosen": -0.9788612127304077, "logits/rejected": -0.9355888962745667, "logps/chosen": -1.3900244235992432, "logps/rejected": -1.739728569984436, "loss": 2.1033, "nll_loss": 0.47596830129623413, "rewards/accuracies": 0.875, "rewards/chosen": -0.13900244235992432, "rewards/margins": 0.034970417618751526, "rewards/rejected": -0.17397285997867584, "step": 225 }, { "epoch": 0.5959129861568886, "grad_norm": 9.999725341796875, "learning_rate": 6.466725820763088e-06, "log_odds_chosen": 0.4868529438972473, "log_odds_ratio": -0.48662108182907104, "logits/chosen": -1.0361462831497192, "logits/rejected": -0.9489240646362305, "logps/chosen": -1.4281866550445557, "logps/rejected": -1.810755968093872, "loss": 2.7363, "nll_loss": 0.6354085206985474, "rewards/accuracies": 1.0, "rewards/chosen": -0.1428186595439911, "rewards/margins": 0.0382569283246994, "rewards/rejected": -0.1810755878686905, "step": 226 }, { "epoch": 0.5985497692814766, "grad_norm": 9.958191871643066, "learning_rate": 6.459627329192546e-06, "log_odds_chosen": 0.18820539116859436, "log_odds_ratio": -0.6056598424911499, "logits/chosen": -1.0441505908966064, "logits/rejected": -0.9917400479316711, "logps/chosen": -1.5679686069488525, "logps/rejected": -1.7161450386047363, "loss": 2.7614, "nll_loss": 0.6297923922538757, "rewards/accuracies": 0.875, "rewards/chosen": -0.1567968726158142, "rewards/margins": 0.014817635528743267, "rewards/rejected": -0.17161451280117035, "step": 227 }, { "epoch": 0.6011865524060646, "grad_norm": 10.128595352172852, "learning_rate": 6.452528837622005e-06, "log_odds_chosen": 0.2932848036289215, "log_odds_ratio": -0.5645895004272461, "logits/chosen": -0.9858654141426086, "logits/rejected": -0.9277141094207764, "logps/chosen": -1.513630747795105, "logps/rejected": -1.7531180381774902, "loss": 2.8007, "nll_loss": 0.6437171697616577, "rewards/accuracies": 0.875, "rewards/chosen": -0.1513630747795105, "rewards/margins": 0.02394874021410942, "rewards/rejected": -0.17531180381774902, "step": 228 }, { "epoch": 0.6038233355306526, "grad_norm": 9.5545015335083, "learning_rate": 6.445430346051464e-06, "log_odds_chosen": 0.6906859874725342, "log_odds_ratio": -0.42688441276550293, "logits/chosen": -0.949820876121521, "logits/rejected": -0.9070621728897095, "logps/chosen": -1.4062092304229736, "logps/rejected": -1.950735092163086, "loss": 1.7927, "nll_loss": 0.40549033880233765, "rewards/accuracies": 1.0, "rewards/chosen": -0.14062091708183289, "rewards/margins": 0.054452601820230484, "rewards/rejected": -0.19507351517677307, "step": 229 }, { "epoch": 0.6064601186552406, "grad_norm": 10.831774711608887, "learning_rate": 6.438331854480923e-06, "log_odds_chosen": 0.3960343301296234, "log_odds_ratio": -0.5205682516098022, "logits/chosen": -1.0768241882324219, "logits/rejected": -0.9421679973602295, "logps/chosen": -1.5416074991226196, "logps/rejected": -1.8671907186508179, "loss": 3.1623, "nll_loss": 0.7385120391845703, "rewards/accuracies": 1.0, "rewards/chosen": -0.1541607528924942, "rewards/margins": 0.032558321952819824, "rewards/rejected": -0.18671908974647522, "step": 230 }, { "epoch": 0.6090969017798286, "grad_norm": 9.034571647644043, "learning_rate": 6.431233362910381e-06, "log_odds_chosen": 0.564643383026123, "log_odds_ratio": -0.46083638072013855, "logits/chosen": -0.9658318758010864, "logits/rejected": -0.9441201090812683, "logps/chosen": -1.2823760509490967, "logps/rejected": -1.703157901763916, "loss": 1.6643, "nll_loss": 0.36999040842056274, "rewards/accuracies": 1.0, "rewards/chosen": -0.12823760509490967, "rewards/margins": 0.04207818582653999, "rewards/rejected": -0.17031580209732056, "step": 231 }, { "epoch": 0.6117336849044166, "grad_norm": 10.05081558227539, "learning_rate": 6.42413487133984e-06, "log_odds_chosen": 0.3165929913520813, "log_odds_ratio": -0.5530787110328674, "logits/chosen": -1.0751591920852661, "logits/rejected": -0.9716577529907227, "logps/chosen": -1.5636658668518066, "logps/rejected": -1.8268802165985107, "loss": 2.8134, "nll_loss": 0.6480363607406616, "rewards/accuracies": 1.0, "rewards/chosen": -0.15636658668518066, "rewards/margins": 0.026321424171328545, "rewards/rejected": -0.18268801271915436, "step": 232 }, { "epoch": 0.6143704680290046, "grad_norm": 10.55700969696045, "learning_rate": 6.417036379769299e-06, "log_odds_chosen": 0.21344926953315735, "log_odds_ratio": -0.6002848148345947, "logits/chosen": -1.0792782306671143, "logits/rejected": -1.0400300025939941, "logps/chosen": -1.662772297859192, "logps/rejected": -1.8279900550842285, "loss": 2.9567, "nll_loss": 0.6791369318962097, "rewards/accuracies": 0.875, "rewards/chosen": -0.1662772297859192, "rewards/margins": 0.01652177795767784, "rewards/rejected": -0.18279901146888733, "step": 233 }, { "epoch": 0.6170072511535926, "grad_norm": 9.214619636535645, "learning_rate": 6.409937888198757e-06, "log_odds_chosen": 0.22965389490127563, "log_odds_ratio": -0.6114335060119629, "logits/chosen": -0.9787470102310181, "logits/rejected": -0.9347630143165588, "logps/chosen": -1.5235848426818848, "logps/rejected": -1.7249484062194824, "loss": 2.1824, "nll_loss": 0.48445600271224976, "rewards/accuracies": 0.75, "rewards/chosen": -0.1523585021495819, "rewards/margins": 0.020136352628469467, "rewards/rejected": -0.17249484360218048, "step": 234 }, { "epoch": 0.6196440342781806, "grad_norm": 9.567245483398438, "learning_rate": 6.4028393966282165e-06, "log_odds_chosen": 0.2276856005191803, "log_odds_ratio": -0.5981299877166748, "logits/chosen": -0.9614957571029663, "logits/rejected": -0.8972662687301636, "logps/chosen": -1.5509299039840698, "logps/rejected": -1.7373332977294922, "loss": 2.3168, "nll_loss": 0.5193837285041809, "rewards/accuracies": 0.875, "rewards/chosen": -0.1550929844379425, "rewards/margins": 0.01864035427570343, "rewards/rejected": -0.17373332381248474, "step": 235 }, { "epoch": 0.6222808174027686, "grad_norm": 9.45780086517334, "learning_rate": 6.3957409050576745e-06, "log_odds_chosen": 0.5684501528739929, "log_odds_ratio": -0.45974200963974, "logits/chosen": -0.9882232546806335, "logits/rejected": -0.9146469831466675, "logps/chosen": -1.3238399028778076, "logps/rejected": -1.7685880661010742, "loss": 2.0164, "nll_loss": 0.4581286311149597, "rewards/accuracies": 1.0, "rewards/chosen": -0.13238400220870972, "rewards/margins": 0.044474828988313675, "rewards/rejected": -0.1768588274717331, "step": 236 }, { "epoch": 0.6249176005273567, "grad_norm": 9.68630599975586, "learning_rate": 6.388642413487134e-06, "log_odds_chosen": 0.2962833344936371, "log_odds_ratio": -0.563479483127594, "logits/chosen": -1.0296807289123535, "logits/rejected": -0.970088541507721, "logps/chosen": -1.3749408721923828, "logps/rejected": -1.5976872444152832, "loss": 2.4518, "nll_loss": 0.5565969944000244, "rewards/accuracies": 0.875, "rewards/chosen": -0.13749408721923828, "rewards/margins": 0.02227463759481907, "rewards/rejected": -0.1597687155008316, "step": 237 }, { "epoch": 0.6275543836519446, "grad_norm": 10.236848831176758, "learning_rate": 6.381543921916592e-06, "log_odds_chosen": 0.24890679121017456, "log_odds_ratio": -0.5857919454574585, "logits/chosen": -1.0486788749694824, "logits/rejected": -0.9479892253875732, "logps/chosen": -1.5664364099502563, "logps/rejected": -1.764575481414795, "loss": 2.699, "nll_loss": 0.6161803007125854, "rewards/accuracies": 0.875, "rewards/chosen": -0.15664362907409668, "rewards/margins": 0.019813913851976395, "rewards/rejected": -0.17645753920078278, "step": 238 }, { "epoch": 0.6301911667765326, "grad_norm": 9.747345924377441, "learning_rate": 6.374445430346051e-06, "log_odds_chosen": 0.22345906496047974, "log_odds_ratio": -0.5982892513275146, "logits/chosen": -1.0117814540863037, "logits/rejected": -0.9376793503761292, "logps/chosen": -1.4230129718780518, "logps/rejected": -1.586336612701416, "loss": 2.3352, "nll_loss": 0.5239756107330322, "rewards/accuracies": 0.75, "rewards/chosen": -0.1423013061285019, "rewards/margins": 0.01633235067129135, "rewards/rejected": -0.15863364934921265, "step": 239 }, { "epoch": 0.6328279499011207, "grad_norm": 9.946846008300781, "learning_rate": 6.36734693877551e-06, "log_odds_chosen": 0.48092615604400635, "log_odds_ratio": -0.5143439769744873, "logits/chosen": -1.0396180152893066, "logits/rejected": -0.9259074926376343, "logps/chosen": -1.461484670639038, "logps/rejected": -1.854004979133606, "loss": 2.3684, "nll_loss": 0.5406550765037537, "rewards/accuracies": 0.875, "rewards/chosen": -0.1461484730243683, "rewards/margins": 0.039252012968063354, "rewards/rejected": -0.18540048599243164, "step": 240 }, { "epoch": 0.6354647330257086, "grad_norm": 8.788450241088867, "learning_rate": 6.360248447204968e-06, "log_odds_chosen": 0.47860464453697205, "log_odds_ratio": -0.4993082582950592, "logits/chosen": -0.9827038645744324, "logits/rejected": -0.946448028087616, "logps/chosen": -1.3302018642425537, "logps/rejected": -1.6790567636489868, "loss": 1.8064, "nll_loss": 0.4016784429550171, "rewards/accuracies": 1.0, "rewards/chosen": -0.13302019238471985, "rewards/margins": 0.03488549590110779, "rewards/rejected": -0.16790568828582764, "step": 241 }, { "epoch": 0.6381015161502966, "grad_norm": 9.804183006286621, "learning_rate": 6.353149955634427e-06, "log_odds_chosen": 0.39079076051712036, "log_odds_ratio": -0.5262644290924072, "logits/chosen": -1.0029771327972412, "logits/rejected": -0.911673367023468, "logps/chosen": -1.5539920330047607, "logps/rejected": -1.8723018169403076, "loss": 2.3257, "nll_loss": 0.5287995338439941, "rewards/accuracies": 0.875, "rewards/chosen": -0.15539920330047607, "rewards/margins": 0.03183097392320633, "rewards/rejected": -0.187230184674263, "step": 242 }, { "epoch": 0.6407382992748847, "grad_norm": 9.535198211669922, "learning_rate": 6.346051464063886e-06, "log_odds_chosen": 0.37506628036499023, "log_odds_ratio": -0.5474951863288879, "logits/chosen": -1.0286223888397217, "logits/rejected": -0.9575079083442688, "logps/chosen": -1.509547472000122, "logps/rejected": -1.8135558366775513, "loss": 2.2811, "nll_loss": 0.5155328512191772, "rewards/accuracies": 0.75, "rewards/chosen": -0.15095475316047668, "rewards/margins": 0.030400821939110756, "rewards/rejected": -0.1813555806875229, "step": 243 }, { "epoch": 0.6433750823994726, "grad_norm": 9.990318298339844, "learning_rate": 6.338952972493345e-06, "log_odds_chosen": 0.4546193778514862, "log_odds_ratio": -0.5123645663261414, "logits/chosen": -0.9370576739311218, "logits/rejected": -0.9146692752838135, "logps/chosen": -1.3397631645202637, "logps/rejected": -1.688523530960083, "loss": 2.5396, "nll_loss": 0.5836517214775085, "rewards/accuracies": 0.875, "rewards/chosen": -0.1339763104915619, "rewards/margins": 0.03487604111433029, "rewards/rejected": -0.16885235905647278, "step": 244 }, { "epoch": 0.6460118655240606, "grad_norm": 9.547015190124512, "learning_rate": 6.331854480922804e-06, "log_odds_chosen": 0.1481492817401886, "log_odds_ratio": -0.6277358531951904, "logits/chosen": -1.0131888389587402, "logits/rejected": -0.9746987819671631, "logps/chosen": -1.396069884300232, "logps/rejected": -1.5164241790771484, "loss": 2.0986, "nll_loss": 0.46188369393348694, "rewards/accuracies": 0.75, "rewards/chosen": -0.13960698246955872, "rewards/margins": 0.012035440653562546, "rewards/rejected": -0.15164242684841156, "step": 245 }, { "epoch": 0.6486486486486487, "grad_norm": 10.266288757324219, "learning_rate": 6.324755989352263e-06, "log_odds_chosen": 0.3449682295322418, "log_odds_ratio": -0.5422677993774414, "logits/chosen": -0.9396032094955444, "logits/rejected": -0.8918734192848206, "logps/chosen": -1.4369215965270996, "logps/rejected": -1.6943345069885254, "loss": 2.6221, "nll_loss": 0.6013014912605286, "rewards/accuracies": 1.0, "rewards/chosen": -0.14369216561317444, "rewards/margins": 0.025741294026374817, "rewards/rejected": -0.16943347454071045, "step": 246 }, { "epoch": 0.6512854317732366, "grad_norm": 10.216219902038574, "learning_rate": 6.317657497781721e-06, "log_odds_chosen": 0.13044913113117218, "log_odds_ratio": -0.6382143497467041, "logits/chosen": -1.0274735689163208, "logits/rejected": -0.9672619104385376, "logps/chosen": -1.6018891334533691, "logps/rejected": -1.697109580039978, "loss": 2.5938, "nll_loss": 0.5846191644668579, "rewards/accuracies": 0.75, "rewards/chosen": -0.16018891334533691, "rewards/margins": 0.009522044099867344, "rewards/rejected": -0.16971096396446228, "step": 247 }, { "epoch": 0.6539222148978246, "grad_norm": 10.080612182617188, "learning_rate": 6.3105590062111805e-06, "log_odds_chosen": 0.3156033754348755, "log_odds_ratio": -0.5620311498641968, "logits/chosen": -0.9943605661392212, "logits/rejected": -0.913483202457428, "logps/chosen": -1.4455002546310425, "logps/rejected": -1.6898422241210938, "loss": 2.3417, "nll_loss": 0.5292194485664368, "rewards/accuracies": 0.75, "rewards/chosen": -0.14455002546310425, "rewards/margins": 0.024434205144643784, "rewards/rejected": -0.16898421943187714, "step": 248 }, { "epoch": 0.6565589980224127, "grad_norm": 10.621593475341797, "learning_rate": 6.3034605146406385e-06, "log_odds_chosen": 0.3722479045391083, "log_odds_ratio": -0.5260739326477051, "logits/chosen": -1.010783076286316, "logits/rejected": -0.9419607520103455, "logps/chosen": -1.393200159072876, "logps/rejected": -1.6805057525634766, "loss": 2.4488, "nll_loss": 0.5595893263816833, "rewards/accuracies": 1.0, "rewards/chosen": -0.1393200308084488, "rewards/margins": 0.02873056009411812, "rewards/rejected": -0.1680505871772766, "step": 249 }, { "epoch": 0.6591957811470006, "grad_norm": 10.194984436035156, "learning_rate": 6.296362023070097e-06, "log_odds_chosen": 0.2856564223766327, "log_odds_ratio": -0.5765359997749329, "logits/chosen": -1.040339708328247, "logits/rejected": -0.9896783828735352, "logps/chosen": -1.6168367862701416, "logps/rejected": -1.860325813293457, "loss": 2.5298, "nll_loss": 0.5747956037521362, "rewards/accuracies": 0.75, "rewards/chosen": -0.16168367862701416, "rewards/margins": 0.024348901584744453, "rewards/rejected": -0.18603257834911346, "step": 250 }, { "epoch": 0.6618325642715887, "grad_norm": 10.166160583496094, "learning_rate": 6.289263531499556e-06, "log_odds_chosen": 0.49128079414367676, "log_odds_ratio": -0.5052339434623718, "logits/chosen": -1.0190470218658447, "logits/rejected": -0.9464223384857178, "logps/chosen": -1.583850383758545, "logps/rejected": -1.9695919752120972, "loss": 3.0117, "nll_loss": 0.7024120092391968, "rewards/accuracies": 0.875, "rewards/chosen": -0.1583850383758545, "rewards/margins": 0.038574155420064926, "rewards/rejected": -0.1969592124223709, "step": 251 }, { "epoch": 0.6644693473961767, "grad_norm": 9.572004318237305, "learning_rate": 6.282165039929014e-06, "log_odds_chosen": 0.6220227479934692, "log_odds_ratio": -0.44183260202407837, "logits/chosen": -1.118480920791626, "logits/rejected": -0.9814411401748657, "logps/chosen": -1.4758163690567017, "logps/rejected": -1.9799898862838745, "loss": 2.901, "nll_loss": 0.6810585856437683, "rewards/accuracies": 1.0, "rewards/chosen": -0.14758163690567017, "rewards/margins": 0.05041735619306564, "rewards/rejected": -0.1979989856481552, "step": 252 }, { "epoch": 0.6671061305207646, "grad_norm": 10.045660972595215, "learning_rate": 6.275066548358473e-06, "log_odds_chosen": 0.2900097966194153, "log_odds_ratio": -0.5657051801681519, "logits/chosen": -1.0574668645858765, "logits/rejected": -0.9897823333740234, "logps/chosen": -1.668576955795288, "logps/rejected": -1.9087705612182617, "loss": 2.8323, "nll_loss": 0.6514928936958313, "rewards/accuracies": 0.875, "rewards/chosen": -0.16685768961906433, "rewards/margins": 0.024019362404942513, "rewards/rejected": -0.1908770501613617, "step": 253 }, { "epoch": 0.6697429136453527, "grad_norm": 9.784111976623535, "learning_rate": 6.267968056787932e-06, "log_odds_chosen": 0.21802666783332825, "log_odds_ratio": -0.5974329113960266, "logits/chosen": -1.0291823148727417, "logits/rejected": -0.9502480030059814, "logps/chosen": -1.4417308568954468, "logps/rejected": -1.6095776557922363, "loss": 2.3837, "nll_loss": 0.536192774772644, "rewards/accuracies": 1.0, "rewards/chosen": -0.14417308568954468, "rewards/margins": 0.016784675419330597, "rewards/rejected": -0.16095776855945587, "step": 254 }, { "epoch": 0.6723796967699407, "grad_norm": 10.003222465515137, "learning_rate": 6.260869565217391e-06, "log_odds_chosen": 0.3013702630996704, "log_odds_ratio": -0.5770978927612305, "logits/chosen": -0.9175786375999451, "logits/rejected": -0.8581453561782837, "logps/chosen": -1.6248061656951904, "logps/rejected": -1.8886432647705078, "loss": 2.7559, "nll_loss": 0.6312604546546936, "rewards/accuracies": 0.75, "rewards/chosen": -0.16248062252998352, "rewards/margins": 0.026383716613054276, "rewards/rejected": -0.1888643503189087, "step": 255 }, { "epoch": 0.6750164798945286, "grad_norm": 9.680693626403809, "learning_rate": 6.25377107364685e-06, "log_odds_chosen": 0.2826671898365021, "log_odds_ratio": -0.5702804327011108, "logits/chosen": -1.057939052581787, "logits/rejected": -0.9414132833480835, "logps/chosen": -1.4873517751693726, "logps/rejected": -1.7252017259597778, "loss": 2.5016, "nll_loss": 0.5683744549751282, "rewards/accuracies": 0.875, "rewards/chosen": -0.1487351655960083, "rewards/margins": 0.023785004392266273, "rewards/rejected": -0.17252019047737122, "step": 256 }, { "epoch": 0.6776532630191167, "grad_norm": 9.82450008392334, "learning_rate": 6.246672582076309e-06, "log_odds_chosen": 0.42896807193756104, "log_odds_ratio": -0.5112147927284241, "logits/chosen": -1.066590428352356, "logits/rejected": -0.9673632383346558, "logps/chosen": -1.5336809158325195, "logps/rejected": -1.882852554321289, "loss": 2.7702, "nll_loss": 0.641433835029602, "rewards/accuracies": 0.875, "rewards/chosen": -0.15336810052394867, "rewards/margins": 0.03491714596748352, "rewards/rejected": -0.1882852464914322, "step": 257 }, { "epoch": 0.6802900461437047, "grad_norm": 9.927961349487305, "learning_rate": 6.239574090505767e-06, "log_odds_chosen": 0.38350343704223633, "log_odds_ratio": -0.5303336381912231, "logits/chosen": -1.0436394214630127, "logits/rejected": -0.9509331583976746, "logps/chosen": -1.5962193012237549, "logps/rejected": -1.9133834838867188, "loss": 2.694, "nll_loss": 0.6204739212989807, "rewards/accuracies": 0.875, "rewards/chosen": -0.159621924161911, "rewards/margins": 0.03171642869710922, "rewards/rejected": -0.19133836030960083, "step": 258 }, { "epoch": 0.6829268292682927, "grad_norm": 10.485347747802734, "learning_rate": 6.232475598935226e-06, "log_odds_chosen": 0.1634972095489502, "log_odds_ratio": -0.6173048615455627, "logits/chosen": -1.057889699935913, "logits/rejected": -1.0043100118637085, "logps/chosen": -1.6032731533050537, "logps/rejected": -1.7344703674316406, "loss": 3.0032, "nll_loss": 0.6890586614608765, "rewards/accuracies": 0.875, "rewards/chosen": -0.16032731533050537, "rewards/margins": 0.01311972551047802, "rewards/rejected": -0.17344704270362854, "step": 259 }, { "epoch": 0.6855636123928807, "grad_norm": 9.942307472229004, "learning_rate": 6.225377107364685e-06, "log_odds_chosen": 0.45469415187835693, "log_odds_ratio": -0.4982328414916992, "logits/chosen": -1.0273206233978271, "logits/rejected": -0.9484516978263855, "logps/chosen": -1.450758934020996, "logps/rejected": -1.815749168395996, "loss": 2.3167, "nll_loss": 0.5293515920639038, "rewards/accuracies": 1.0, "rewards/chosen": -0.14507588744163513, "rewards/margins": 0.0364990308880806, "rewards/rejected": -0.18157494068145752, "step": 260 }, { "epoch": 0.6882003955174687, "grad_norm": 10.553006172180176, "learning_rate": 6.218278615794143e-06, "log_odds_chosen": 0.3766656517982483, "log_odds_ratio": -0.5311521887779236, "logits/chosen": -1.0044142007827759, "logits/rejected": -0.9288753867149353, "logps/chosen": -1.6029090881347656, "logps/rejected": -1.9176501035690308, "loss": 2.6651, "nll_loss": 0.6131627559661865, "rewards/accuracies": 1.0, "rewards/chosen": -0.1602909117937088, "rewards/margins": 0.03147410601377487, "rewards/rejected": -0.19176501035690308, "step": 261 }, { "epoch": 0.6908371786420567, "grad_norm": 10.289958000183105, "learning_rate": 6.2111801242236025e-06, "log_odds_chosen": 0.37050265073776245, "log_odds_ratio": -0.5371365547180176, "logits/chosen": -1.044307827949524, "logits/rejected": -0.9999052286148071, "logps/chosen": -1.5295069217681885, "logps/rejected": -1.8322433233261108, "loss": 2.8767, "nll_loss": 0.6654726266860962, "rewards/accuracies": 0.875, "rewards/chosen": -0.1529507040977478, "rewards/margins": 0.030273636803030968, "rewards/rejected": -0.18322433531284332, "step": 262 }, { "epoch": 0.6934739617666447, "grad_norm": 10.079249382019043, "learning_rate": 6.204081632653061e-06, "log_odds_chosen": 0.35678648948669434, "log_odds_ratio": -0.5365309715270996, "logits/chosen": -1.0440478324890137, "logits/rejected": -0.9332611560821533, "logps/chosen": -1.5165398120880127, "logps/rejected": -1.7985224723815918, "loss": 2.8466, "nll_loss": 0.658006489276886, "rewards/accuracies": 1.0, "rewards/chosen": -0.1516539752483368, "rewards/margins": 0.028198271989822388, "rewards/rejected": -0.17985224723815918, "step": 263 }, { "epoch": 0.6961107448912327, "grad_norm": 10.370190620422363, "learning_rate": 6.19698314108252e-06, "log_odds_chosen": 0.31438887119293213, "log_odds_ratio": -0.5823736786842346, "logits/chosen": -1.0256203413009644, "logits/rejected": -0.9438142776489258, "logps/chosen": -1.4517719745635986, "logps/rejected": -1.6985116004943848, "loss": 2.22, "nll_loss": 0.49675092101097107, "rewards/accuracies": 0.875, "rewards/chosen": -0.1451771855354309, "rewards/margins": 0.02467396855354309, "rewards/rejected": -0.169851154088974, "step": 264 }, { "epoch": 0.6987475280158207, "grad_norm": 10.093335151672363, "learning_rate": 6.189884649511978e-06, "log_odds_chosen": 0.5108283758163452, "log_odds_ratio": -0.48044654726982117, "logits/chosen": -1.038010597229004, "logits/rejected": -0.9140152931213379, "logps/chosen": -1.3935471773147583, "logps/rejected": -1.7870471477508545, "loss": 2.311, "nll_loss": 0.5297090411186218, "rewards/accuracies": 1.0, "rewards/chosen": -0.13935470581054688, "rewards/margins": 0.0393500030040741, "rewards/rejected": -0.17870470881462097, "step": 265 }, { "epoch": 0.7013843111404087, "grad_norm": 9.891541481018066, "learning_rate": 6.182786157941437e-06, "log_odds_chosen": 0.33088430762290955, "log_odds_ratio": -0.5532064437866211, "logits/chosen": -1.065554141998291, "logits/rejected": -1.032827377319336, "logps/chosen": -1.3114806413650513, "logps/rejected": -1.5359127521514893, "loss": 2.3022, "nll_loss": 0.520240306854248, "rewards/accuracies": 0.875, "rewards/chosen": -0.1311480700969696, "rewards/margins": 0.022443203255534172, "rewards/rejected": -0.15359127521514893, "step": 266 }, { "epoch": 0.7040210942649967, "grad_norm": 9.849285125732422, "learning_rate": 6.175687666370896e-06, "log_odds_chosen": 0.542593240737915, "log_odds_ratio": -0.4701288342475891, "logits/chosen": -1.037049412727356, "logits/rejected": -0.9226208925247192, "logps/chosen": -1.423630714416504, "logps/rejected": -1.8576700687408447, "loss": 2.2385, "nll_loss": 0.5126224160194397, "rewards/accuracies": 1.0, "rewards/chosen": -0.1423630714416504, "rewards/margins": 0.043403930962085724, "rewards/rejected": -0.1857670247554779, "step": 267 }, { "epoch": 0.7066578773895847, "grad_norm": 10.11159610748291, "learning_rate": 6.168589174800354e-06, "log_odds_chosen": 0.3737114667892456, "log_odds_ratio": -0.5347133278846741, "logits/chosen": -1.0566768646240234, "logits/rejected": -0.9615504741668701, "logps/chosen": -1.5327125787734985, "logps/rejected": -1.8281084299087524, "loss": 2.7147, "nll_loss": 0.6252114772796631, "rewards/accuracies": 0.875, "rewards/chosen": -0.15327127277851105, "rewards/margins": 0.029539581388235092, "rewards/rejected": -0.18281084299087524, "step": 268 }, { "epoch": 0.7092946605141727, "grad_norm": 9.563637733459473, "learning_rate": 6.161490683229813e-06, "log_odds_chosen": 0.2552526295185089, "log_odds_ratio": -0.5950664281845093, "logits/chosen": -1.021816611289978, "logits/rejected": -0.9392303824424744, "logps/chosen": -1.48829984664917, "logps/rejected": -1.7040952444076538, "loss": 2.3807, "nll_loss": 0.5356656908988953, "rewards/accuracies": 0.75, "rewards/chosen": -0.14882998168468475, "rewards/margins": 0.021579559892416, "rewards/rejected": -0.17040953040122986, "step": 269 }, { "epoch": 0.7119314436387607, "grad_norm": 9.498360633850098, "learning_rate": 6.154392191659272e-06, "log_odds_chosen": 0.41562414169311523, "log_odds_ratio": -0.5205641984939575, "logits/chosen": -0.9885743856430054, "logits/rejected": -0.9419618844985962, "logps/chosen": -1.4006032943725586, "logps/rejected": -1.7179901599884033, "loss": 1.8683, "nll_loss": 0.4150174856185913, "rewards/accuracies": 1.0, "rewards/chosen": -0.14006033539772034, "rewards/margins": 0.031738679856061935, "rewards/rejected": -0.17179900407791138, "step": 270 }, { "epoch": 0.7145682267633487, "grad_norm": 9.569456100463867, "learning_rate": 6.147293700088731e-06, "log_odds_chosen": 0.48436087369918823, "log_odds_ratio": -0.49208369851112366, "logits/chosen": -1.0386489629745483, "logits/rejected": -0.9447503089904785, "logps/chosen": -1.457115650177002, "logps/rejected": -1.8508636951446533, "loss": 2.3597, "nll_loss": 0.5407109260559082, "rewards/accuracies": 0.875, "rewards/chosen": -0.14571155607700348, "rewards/margins": 0.03937479853630066, "rewards/rejected": -0.18508636951446533, "step": 271 }, { "epoch": 0.7172050098879367, "grad_norm": 11.085938453674316, "learning_rate": 6.140195208518189e-06, "log_odds_chosen": 0.44340041279792786, "log_odds_ratio": -0.5080432891845703, "logits/chosen": -1.1370983123779297, "logits/rejected": -1.0340384244918823, "logps/chosen": -1.6304019689559937, "logps/rejected": -1.9962579011917114, "loss": 3.1557, "nll_loss": 0.7381245493888855, "rewards/accuracies": 0.875, "rewards/chosen": -0.1630401909351349, "rewards/margins": 0.036585595458745956, "rewards/rejected": -0.19962579011917114, "step": 272 }, { "epoch": 0.7198417930125247, "grad_norm": 10.364092826843262, "learning_rate": 6.133096716947649e-06, "log_odds_chosen": 0.23562709987163544, "log_odds_ratio": -0.5894039869308472, "logits/chosen": -1.0973641872406006, "logits/rejected": -1.036959171295166, "logps/chosen": -1.6053028106689453, "logps/rejected": -1.7993088960647583, "loss": 2.8391, "nll_loss": 0.650845468044281, "rewards/accuracies": 0.875, "rewards/chosen": -0.16053026914596558, "rewards/margins": 0.019400635734200478, "rewards/rejected": -0.1799308955669403, "step": 273 }, { "epoch": 0.7224785761371127, "grad_norm": 9.403023719787598, "learning_rate": 6.125998225377107e-06, "log_odds_chosen": 0.5421465635299683, "log_odds_ratio": -0.47444844245910645, "logits/chosen": -1.0056291818618774, "logits/rejected": -0.9600179195404053, "logps/chosen": -1.420364260673523, "logps/rejected": -1.8501088619232178, "loss": 2.0598, "nll_loss": 0.467495322227478, "rewards/accuracies": 1.0, "rewards/chosen": -0.14203643798828125, "rewards/margins": 0.04297446459531784, "rewards/rejected": -0.1850108951330185, "step": 274 }, { "epoch": 0.7251153592617007, "grad_norm": 10.056530952453613, "learning_rate": 6.1188997338065665e-06, "log_odds_chosen": 0.6246941089630127, "log_odds_ratio": -0.47945964336395264, "logits/chosen": -1.0567095279693604, "logits/rejected": -0.9212784767150879, "logps/chosen": -1.582176685333252, "logps/rejected": -2.1189141273498535, "loss": 2.7229, "nll_loss": 0.6327791213989258, "rewards/accuracies": 0.875, "rewards/chosen": -0.1582176685333252, "rewards/margins": 0.05367375165224075, "rewards/rejected": -0.21189141273498535, "step": 275 }, { "epoch": 0.7277521423862887, "grad_norm": 10.36487102508545, "learning_rate": 6.111801242236025e-06, "log_odds_chosen": 0.3678748905658722, "log_odds_ratio": -0.5407025814056396, "logits/chosen": -1.0414223670959473, "logits/rejected": -0.9859704375267029, "logps/chosen": -1.5252094268798828, "logps/rejected": -1.8135719299316406, "loss": 2.6263, "nll_loss": 0.602510929107666, "rewards/accuracies": 0.75, "rewards/chosen": -0.15252093970775604, "rewards/margins": 0.028836257755756378, "rewards/rejected": -0.18135720491409302, "step": 276 }, { "epoch": 0.7303889255108768, "grad_norm": 9.549089431762695, "learning_rate": 6.104702750665483e-06, "log_odds_chosen": 0.22213289141654968, "log_odds_ratio": -0.5968341827392578, "logits/chosen": -1.0602388381958008, "logits/rejected": -0.9919387698173523, "logps/chosen": -1.4902098178863525, "logps/rejected": -1.6675565242767334, "loss": 2.3944, "nll_loss": 0.5389155745506287, "rewards/accuracies": 0.75, "rewards/chosen": -0.1490209847688675, "rewards/margins": 0.017734672874212265, "rewards/rejected": -0.16675564646720886, "step": 277 }, { "epoch": 0.7330257086354647, "grad_norm": 9.606721878051758, "learning_rate": 6.097604259094942e-06, "log_odds_chosen": 0.2623395323753357, "log_odds_ratio": -0.5808668732643127, "logits/chosen": -1.0555493831634521, "logits/rejected": -0.9892610907554626, "logps/chosen": -1.4999754428863525, "logps/rejected": -1.7185721397399902, "loss": 2.4409, "nll_loss": 0.5521459579467773, "rewards/accuracies": 0.875, "rewards/chosen": -0.1499975323677063, "rewards/margins": 0.02185966446995735, "rewards/rejected": -0.17185720801353455, "step": 278 }, { "epoch": 0.7356624917600527, "grad_norm": 10.382844924926758, "learning_rate": 6.0905057675244005e-06, "log_odds_chosen": 0.2454441487789154, "log_odds_ratio": -0.5859407186508179, "logits/chosen": -0.9638535976409912, "logits/rejected": -0.9263155460357666, "logps/chosen": -1.5392746925354004, "logps/rejected": -1.7312613725662231, "loss": 2.809, "nll_loss": 0.6436530947685242, "rewards/accuracies": 0.875, "rewards/chosen": -0.15392747521400452, "rewards/margins": 0.019198667258024216, "rewards/rejected": -0.17312613129615784, "step": 279 }, { "epoch": 0.7382992748846408, "grad_norm": 9.472749710083008, "learning_rate": 6.083407275953859e-06, "log_odds_chosen": 0.05665683373808861, "log_odds_ratio": -0.670945405960083, "logits/chosen": -1.0366100072860718, "logits/rejected": -0.9746451377868652, "logps/chosen": -1.4893945455551147, "logps/rejected": -1.5296626091003418, "loss": 2.1057, "nll_loss": 0.4593251943588257, "rewards/accuracies": 0.5, "rewards/chosen": -0.14893946051597595, "rewards/margins": 0.004026808775961399, "rewards/rejected": -0.15296626091003418, "step": 280 }, { "epoch": 0.7409360580092288, "grad_norm": 9.456088066101074, "learning_rate": 6.076308784383318e-06, "log_odds_chosen": 0.27157944440841675, "log_odds_ratio": -0.5769294500350952, "logits/chosen": -0.9719491600990295, "logits/rejected": -0.9362573623657227, "logps/chosen": -1.4628607034683228, "logps/rejected": -1.685976266860962, "loss": 1.9465, "nll_loss": 0.4289361238479614, "rewards/accuracies": 0.875, "rewards/chosen": -0.14628607034683228, "rewards/margins": 0.02231154590845108, "rewards/rejected": -0.16859760880470276, "step": 281 }, { "epoch": 0.7435728411338167, "grad_norm": 10.356871604919434, "learning_rate": 6.069210292812777e-06, "log_odds_chosen": 0.4430519938468933, "log_odds_ratio": -0.5062916278839111, "logits/chosen": -1.0771749019622803, "logits/rejected": -0.9861480593681335, "logps/chosen": -1.5304234027862549, "logps/rejected": -1.8878121376037598, "loss": 2.872, "nll_loss": 0.6673677563667297, "rewards/accuracies": 1.0, "rewards/chosen": -0.15304234623908997, "rewards/margins": 0.03573887050151825, "rewards/rejected": -0.18878121674060822, "step": 282 }, { "epoch": 0.7462096242584048, "grad_norm": 10.179832458496094, "learning_rate": 6.062111801242236e-06, "log_odds_chosen": 0.11679290235042572, "log_odds_ratio": -0.6467869281768799, "logits/chosen": -0.9838048219680786, "logits/rejected": -0.9357846975326538, "logps/chosen": -1.581260323524475, "logps/rejected": -1.6788735389709473, "loss": 2.7507, "nll_loss": 0.6229973435401917, "rewards/accuracies": 0.75, "rewards/chosen": -0.15812602639198303, "rewards/margins": 0.009761332534253597, "rewards/rejected": -0.1678873598575592, "step": 283 }, { "epoch": 0.7488464073829928, "grad_norm": 9.709866523742676, "learning_rate": 6.055013309671695e-06, "log_odds_chosen": 0.1251356452703476, "log_odds_ratio": -0.6371869444847107, "logits/chosen": -0.9856249094009399, "logits/rejected": -0.9588727951049805, "logps/chosen": -1.3696320056915283, "logps/rejected": -1.4571166038513184, "loss": 2.046, "nll_loss": 0.4477924108505249, "rewards/accuracies": 0.625, "rewards/chosen": -0.13696320354938507, "rewards/margins": 0.008748448453843594, "rewards/rejected": -0.14571166038513184, "step": 284 }, { "epoch": 0.7514831905075807, "grad_norm": 9.576770782470703, "learning_rate": 6.047914818101153e-06, "log_odds_chosen": 0.8595589399337769, "log_odds_ratio": -0.4332429766654968, "logits/chosen": -0.989209771156311, "logits/rejected": -0.9225939512252808, "logps/chosen": -1.6342555284500122, "logps/rejected": -2.388579845428467, "loss": 2.1112, "nll_loss": 0.4844658672809601, "rewards/accuracies": 0.875, "rewards/chosen": -0.16342556476593018, "rewards/margins": 0.0754324272274971, "rewards/rejected": -0.23885798454284668, "step": 285 }, { "epoch": 0.7541199736321688, "grad_norm": 9.967181205749512, "learning_rate": 6.040816326530612e-06, "log_odds_chosen": 0.5282140374183655, "log_odds_ratio": -0.4664245843887329, "logits/chosen": -1.0063544511795044, "logits/rejected": -0.9135369658470154, "logps/chosen": -1.426211953163147, "logps/rejected": -1.8436334133148193, "loss": 2.6222, "nll_loss": 0.6088989973068237, "rewards/accuracies": 1.0, "rewards/chosen": -0.14262118935585022, "rewards/margins": 0.04174215719103813, "rewards/rejected": -0.18436335027217865, "step": 286 }, { "epoch": 0.7567567567567568, "grad_norm": 10.017861366271973, "learning_rate": 6.033717834960071e-06, "log_odds_chosen": 0.377551794052124, "log_odds_ratio": -0.5416433215141296, "logits/chosen": -1.077212929725647, "logits/rejected": -0.9847630262374878, "logps/chosen": -1.4043962955474854, "logps/rejected": -1.7077497243881226, "loss": 2.6262, "nll_loss": 0.6023932695388794, "rewards/accuracies": 0.875, "rewards/chosen": -0.14043962955474854, "rewards/margins": 0.030335336923599243, "rewards/rejected": -0.17077496647834778, "step": 287 }, { "epoch": 0.7593935398813447, "grad_norm": 10.703896522521973, "learning_rate": 6.026619343389529e-06, "log_odds_chosen": 0.5014585256576538, "log_odds_ratio": -0.47988784313201904, "logits/chosen": -1.066920280456543, "logits/rejected": -0.9490258097648621, "logps/chosen": -1.6449315547943115, "logps/rejected": -2.0663325786590576, "loss": 2.751, "nll_loss": 0.6397608518600464, "rewards/accuracies": 1.0, "rewards/chosen": -0.16449317336082458, "rewards/margins": 0.04214010387659073, "rewards/rejected": -0.20663326978683472, "step": 288 }, { "epoch": 0.7620303230059328, "grad_norm": 11.897303581237793, "learning_rate": 6.019520851818989e-06, "log_odds_chosen": 0.2389475405216217, "log_odds_ratio": -0.5903327465057373, "logits/chosen": -1.0451998710632324, "logits/rejected": -0.9830878973007202, "logps/chosen": -2.0414822101593018, "logps/rejected": -2.2409515380859375, "loss": 4.2908, "nll_loss": 1.013670563697815, "rewards/accuracies": 0.75, "rewards/chosen": -0.20414823293685913, "rewards/margins": 0.01994692161679268, "rewards/rejected": -0.2240951508283615, "step": 289 }, { "epoch": 0.7646671061305208, "grad_norm": 9.770140647888184, "learning_rate": 6.012422360248447e-06, "log_odds_chosen": 0.44657570123672485, "log_odds_ratio": -0.5040766000747681, "logits/chosen": -1.0088856220245361, "logits/rejected": -0.9383779168128967, "logps/chosen": -1.5648479461669922, "logps/rejected": -1.9286675453186035, "loss": 2.4701, "nll_loss": 0.5671277642250061, "rewards/accuracies": 0.875, "rewards/chosen": -0.15648479759693146, "rewards/margins": 0.03638195991516113, "rewards/rejected": -0.19286677241325378, "step": 290 }, { "epoch": 0.7673038892551087, "grad_norm": 10.833564758300781, "learning_rate": 6.0053238686779056e-06, "log_odds_chosen": 0.35445818305015564, "log_odds_ratio": -0.540325939655304, "logits/chosen": -1.0003407001495361, "logits/rejected": -0.9062169790267944, "logps/chosen": -1.5486013889312744, "logps/rejected": -1.8227717876434326, "loss": 2.9061, "nll_loss": 0.6724933981895447, "rewards/accuracies": 0.875, "rewards/chosen": -0.15486015379428864, "rewards/margins": 0.02741703763604164, "rewards/rejected": -0.18227717280387878, "step": 291 }, { "epoch": 0.7699406723796968, "grad_norm": 10.445130348205566, "learning_rate": 5.9982253771073645e-06, "log_odds_chosen": 0.24582642316818237, "log_odds_ratio": -0.5868759155273438, "logits/chosen": -1.0471625328063965, "logits/rejected": -0.9607016444206238, "logps/chosen": -1.5718451738357544, "logps/rejected": -1.7703518867492676, "loss": 2.7326, "nll_loss": 0.6244602203369141, "rewards/accuracies": 0.75, "rewards/chosen": -0.15718451142311096, "rewards/margins": 0.019850673153996468, "rewards/rejected": -0.17703518271446228, "step": 292 }, { "epoch": 0.7725774555042848, "grad_norm": 9.336409568786621, "learning_rate": 5.991126885536823e-06, "log_odds_chosen": 0.28063106536865234, "log_odds_ratio": -0.5680664777755737, "logits/chosen": -1.0261191129684448, "logits/rejected": -0.96542888879776, "logps/chosen": -1.2532575130462646, "logps/rejected": -1.4647881984710693, "loss": 1.8778, "nll_loss": 0.4126465320587158, "rewards/accuracies": 0.875, "rewards/chosen": -0.1253257393836975, "rewards/margins": 0.021153081208467484, "rewards/rejected": -0.1464788317680359, "step": 293 }, { "epoch": 0.7752142386288727, "grad_norm": 10.19467830657959, "learning_rate": 5.984028393966282e-06, "log_odds_chosen": 0.3982515335083008, "log_odds_ratio": -0.5293940305709839, "logits/chosen": -1.054459810256958, "logits/rejected": -1.005912184715271, "logps/chosen": -1.3878642320632935, "logps/rejected": -1.7107563018798828, "loss": 2.2757, "nll_loss": 0.5159916877746582, "rewards/accuracies": 1.0, "rewards/chosen": -0.1387864202260971, "rewards/margins": 0.032289210706949234, "rewards/rejected": -0.17107564210891724, "step": 294 }, { "epoch": 0.7778510217534608, "grad_norm": 10.44420051574707, "learning_rate": 5.97692990239574e-06, "log_odds_chosen": 0.37824469804763794, "log_odds_ratio": -0.5246709585189819, "logits/chosen": -1.0574809312820435, "logits/rejected": -0.9439191222190857, "logps/chosen": -1.517193078994751, "logps/rejected": -1.8170679807662964, "loss": 2.7871, "nll_loss": 0.6443036198616028, "rewards/accuracies": 1.0, "rewards/chosen": -0.15171930193901062, "rewards/margins": 0.029987502843141556, "rewards/rejected": -0.18170681595802307, "step": 295 }, { "epoch": 0.7804878048780488, "grad_norm": 9.556363105773926, "learning_rate": 5.969831410825199e-06, "log_odds_chosen": 0.5033254623413086, "log_odds_ratio": -0.4875450134277344, "logits/chosen": -1.0689876079559326, "logits/rejected": -0.9918434619903564, "logps/chosen": -1.4760112762451172, "logps/rejected": -1.8789564371109009, "loss": 2.5634, "nll_loss": 0.592098593711853, "rewards/accuracies": 0.875, "rewards/chosen": -0.14760112762451172, "rewards/margins": 0.040294528007507324, "rewards/rejected": -0.18789565563201904, "step": 296 }, { "epoch": 0.7831245880026367, "grad_norm": 10.892327308654785, "learning_rate": 5.962732919254658e-06, "log_odds_chosen": 0.5084447860717773, "log_odds_ratio": -0.4953627288341522, "logits/chosen": -0.9680205583572388, "logits/rejected": -0.8744199872016907, "logps/chosen": -1.580714464187622, "logps/rejected": -2.009085178375244, "loss": 3.0477, "nll_loss": 0.712380051612854, "rewards/accuracies": 0.875, "rewards/chosen": -0.15807145833969116, "rewards/margins": 0.04283707216382027, "rewards/rejected": -0.20090851187705994, "step": 297 }, { "epoch": 0.7857613711272248, "grad_norm": 10.603328704833984, "learning_rate": 5.955634427684117e-06, "log_odds_chosen": 0.3260452449321747, "log_odds_ratio": -0.5495268106460571, "logits/chosen": -1.048534870147705, "logits/rejected": -1.0102920532226562, "logps/chosen": -1.6070526838302612, "logps/rejected": -1.8762900829315186, "loss": 2.8359, "nll_loss": 0.6540123224258423, "rewards/accuracies": 1.0, "rewards/chosen": -0.16070528328418732, "rewards/margins": 0.02692374214529991, "rewards/rejected": -0.18762901425361633, "step": 298 }, { "epoch": 0.7883981542518128, "grad_norm": 9.625335693359375, "learning_rate": 5.948535936113575e-06, "log_odds_chosen": 0.4078028202056885, "log_odds_ratio": -0.5223979949951172, "logits/chosen": -1.0432285070419312, "logits/rejected": -0.966896653175354, "logps/chosen": -1.4103808403015137, "logps/rejected": -1.7310397624969482, "loss": 2.3302, "nll_loss": 0.5303081274032593, "rewards/accuracies": 0.875, "rewards/chosen": -0.14103807508945465, "rewards/margins": 0.03206588327884674, "rewards/rejected": -0.17310397326946259, "step": 299 }, { "epoch": 0.7910349373764007, "grad_norm": 10.106436729431152, "learning_rate": 5.941437444543035e-06, "log_odds_chosen": 0.5095257759094238, "log_odds_ratio": -0.48302483558654785, "logits/chosen": -1.0242811441421509, "logits/rejected": -0.9563637971878052, "logps/chosen": -1.5495760440826416, "logps/rejected": -1.979056477546692, "loss": 2.345, "nll_loss": 0.5379441976547241, "rewards/accuracies": 1.0, "rewards/chosen": -0.1549576073884964, "rewards/margins": 0.042948052287101746, "rewards/rejected": -0.19790564477443695, "step": 300 }, { "epoch": 0.7936717205009888, "grad_norm": 11.148320198059082, "learning_rate": 5.934338952972493e-06, "log_odds_chosen": 0.3150947093963623, "log_odds_ratio": -0.5589739084243774, "logits/chosen": -1.0287885665893555, "logits/rejected": -0.9546213746070862, "logps/chosen": -1.4842816591262817, "logps/rejected": -1.726853370666504, "loss": 3.0095, "nll_loss": 0.6964690685272217, "rewards/accuracies": 0.875, "rewards/chosen": -0.14842815697193146, "rewards/margins": 0.02425718866288662, "rewards/rejected": -0.17268535494804382, "step": 301 }, { "epoch": 0.7963085036255768, "grad_norm": 9.838821411132812, "learning_rate": 5.927240461401953e-06, "log_odds_chosen": 0.5734976530075073, "log_odds_ratio": -0.45255494117736816, "logits/chosen": -1.057968020439148, "logits/rejected": -0.9363830089569092, "logps/chosen": -1.5468251705169678, "logps/rejected": -2.021021604537964, "loss": 2.7558, "nll_loss": 0.6436825394630432, "rewards/accuracies": 1.0, "rewards/chosen": -0.15468251705169678, "rewards/margins": 0.047419652342796326, "rewards/rejected": -0.2021021544933319, "step": 302 }, { "epoch": 0.7989452867501649, "grad_norm": 9.92994213104248, "learning_rate": 5.920141969831411e-06, "log_odds_chosen": 0.4116380512714386, "log_odds_ratio": -0.5225210189819336, "logits/chosen": -1.0409326553344727, "logits/rejected": -0.9100532531738281, "logps/chosen": -1.4202492237091064, "logps/rejected": -1.7517368793487549, "loss": 2.1704, "nll_loss": 0.4903418719768524, "rewards/accuracies": 0.875, "rewards/chosen": -0.1420249193906784, "rewards/margins": 0.03314877673983574, "rewards/rejected": -0.17517369985580444, "step": 303 }, { "epoch": 0.8015820698747528, "grad_norm": 10.500617980957031, "learning_rate": 5.913043478260869e-06, "log_odds_chosen": 0.3255411386489868, "log_odds_ratio": -0.5523953437805176, "logits/chosen": -0.9314634799957275, "logits/rejected": -0.8714169263839722, "logps/chosen": -1.528282880783081, "logps/rejected": -1.787902593612671, "loss": 2.8426, "nll_loss": 0.6554076075553894, "rewards/accuracies": 0.75, "rewards/chosen": -0.15282830595970154, "rewards/margins": 0.025961963459849358, "rewards/rejected": -0.17879024147987366, "step": 304 }, { "epoch": 0.8042188529993408, "grad_norm": 10.315369606018066, "learning_rate": 5.9059449866903285e-06, "log_odds_chosen": 0.4616833031177521, "log_odds_ratio": -0.5284723043441772, "logits/chosen": -1.0559440851211548, "logits/rejected": -0.9789779186248779, "logps/chosen": -1.544727087020874, "logps/rejected": -1.9428236484527588, "loss": 2.5563, "nll_loss": 0.5862153768539429, "rewards/accuracies": 0.875, "rewards/chosen": -0.1544727236032486, "rewards/margins": 0.03980964422225952, "rewards/rejected": -0.19428236782550812, "step": 305 }, { "epoch": 0.8068556361239289, "grad_norm": 10.912592887878418, "learning_rate": 5.8988464951197865e-06, "log_odds_chosen": 0.4379751682281494, "log_odds_ratio": -0.5078282356262207, "logits/chosen": -1.045093297958374, "logits/rejected": -0.9739735722541809, "logps/chosen": -1.608254313468933, "logps/rejected": -1.9695265293121338, "loss": 2.7347, "nll_loss": 0.6328895092010498, "rewards/accuracies": 1.0, "rewards/chosen": -0.1608254462480545, "rewards/margins": 0.03612722083926201, "rewards/rejected": -0.1969526708126068, "step": 306 }, { "epoch": 0.8094924192485168, "grad_norm": 10.669661521911621, "learning_rate": 5.891748003549245e-06, "log_odds_chosen": 0.21525943279266357, "log_odds_ratio": -0.5981292724609375, "logits/chosen": -1.049196720123291, "logits/rejected": -1.0034172534942627, "logps/chosen": -1.5248042345046997, "logps/rejected": -1.6964774131774902, "loss": 2.9062, "nll_loss": 0.6667332053184509, "rewards/accuracies": 0.75, "rewards/chosen": -0.15248043835163116, "rewards/margins": 0.017167314887046814, "rewards/rejected": -0.16964775323867798, "step": 307 }, { "epoch": 0.8121292023731048, "grad_norm": 9.655624389648438, "learning_rate": 5.884649511978704e-06, "log_odds_chosen": 0.4819903075695038, "log_odds_ratio": -0.4996598958969116, "logits/chosen": -0.9081611633300781, "logits/rejected": -0.8642649054527283, "logps/chosen": -1.4164258241653442, "logps/rejected": -1.7920730113983154, "loss": 2.2922, "nll_loss": 0.5230814218521118, "rewards/accuracies": 0.75, "rewards/chosen": -0.14164258539676666, "rewards/margins": 0.0375647246837616, "rewards/rejected": -0.17920729517936707, "step": 308 }, { "epoch": 0.8147659854976929, "grad_norm": 9.362163543701172, "learning_rate": 5.877551020408163e-06, "log_odds_chosen": 0.2944612205028534, "log_odds_ratio": -0.5692933797836304, "logits/chosen": -1.01718008518219, "logits/rejected": -0.9646862149238586, "logps/chosen": -1.3186593055725098, "logps/rejected": -1.5281615257263184, "loss": 2.1738, "nll_loss": 0.48651689291000366, "rewards/accuracies": 0.875, "rewards/chosen": -0.1318659484386444, "rewards/margins": 0.02095022052526474, "rewards/rejected": -0.15281614661216736, "step": 309 }, { "epoch": 0.8174027686222808, "grad_norm": 9.855142593383789, "learning_rate": 5.870452528837621e-06, "log_odds_chosen": 0.3722895383834839, "log_odds_ratio": -0.5283873081207275, "logits/chosen": -1.020612120628357, "logits/rejected": -0.9500452280044556, "logps/chosen": -1.4737542867660522, "logps/rejected": -1.772289752960205, "loss": 2.3783, "nll_loss": 0.5417414903640747, "rewards/accuracies": 1.0, "rewards/chosen": -0.1473754346370697, "rewards/margins": 0.029853537678718567, "rewards/rejected": -0.17722897231578827, "step": 310 }, { "epoch": 0.8200395517468688, "grad_norm": 10.67618465423584, "learning_rate": 5.863354037267081e-06, "log_odds_chosen": 0.2837017774581909, "log_odds_ratio": -0.570830225944519, "logits/chosen": -1.1645097732543945, "logits/rejected": -1.0463659763336182, "logps/chosen": -1.5850422382354736, "logps/rejected": -1.8165497779846191, "loss": 3.2837, "nll_loss": 0.7638373970985413, "rewards/accuracies": 0.875, "rewards/chosen": -0.15850421786308289, "rewards/margins": 0.02315075509250164, "rewards/rejected": -0.18165498971939087, "step": 311 }, { "epoch": 0.8226763348714569, "grad_norm": 9.986859321594238, "learning_rate": 5.856255545696539e-06, "log_odds_chosen": 0.4877340793609619, "log_odds_ratio": -0.4837613105773926, "logits/chosen": -1.0402659177780151, "logits/rejected": -0.9313653707504272, "logps/chosen": -1.4441670179367065, "logps/rejected": -1.829864263534546, "loss": 2.2893, "nll_loss": 0.523948609828949, "rewards/accuracies": 1.0, "rewards/chosen": -0.1444167047739029, "rewards/margins": 0.03856972977519035, "rewards/rejected": -0.18298642337322235, "step": 312 }, { "epoch": 0.8253131179960448, "grad_norm": 10.904727935791016, "learning_rate": 5.849157054125998e-06, "log_odds_chosen": 0.39935219287872314, "log_odds_ratio": -0.5432575345039368, "logits/chosen": -1.1372846364974976, "logits/rejected": -1.0501521825790405, "logps/chosen": -1.5840697288513184, "logps/rejected": -1.9227668046951294, "loss": 3.1338, "nll_loss": 0.7291156053543091, "rewards/accuracies": 0.75, "rewards/chosen": -0.15840697288513184, "rewards/margins": 0.033869706094264984, "rewards/rejected": -0.19227667152881622, "step": 313 }, { "epoch": 0.8279499011206328, "grad_norm": 10.428577423095703, "learning_rate": 5.842058562555457e-06, "log_odds_chosen": 0.39743325114250183, "log_odds_ratio": -0.5214748382568359, "logits/chosen": -0.9990111589431763, "logits/rejected": -0.9133065938949585, "logps/chosen": -1.6697391271591187, "logps/rejected": -1.9973664283752441, "loss": 2.6977, "nll_loss": 0.6222816109657288, "rewards/accuracies": 0.875, "rewards/chosen": -0.16697391867637634, "rewards/margins": 0.032762713730335236, "rewards/rejected": -0.19973662495613098, "step": 314 }, { "epoch": 0.8305866842452209, "grad_norm": 10.464115142822266, "learning_rate": 5.834960070984915e-06, "log_odds_chosen": 0.3513261377811432, "log_odds_ratio": -0.5399587750434875, "logits/chosen": -1.063493013381958, "logits/rejected": -0.9799022078514099, "logps/chosen": -1.6098552942276, "logps/rejected": -1.8995361328125, "loss": 2.7644, "nll_loss": 0.637109637260437, "rewards/accuracies": 1.0, "rewards/chosen": -0.16098552942276, "rewards/margins": 0.028968090191483498, "rewards/rejected": -0.18995361030101776, "step": 315 }, { "epoch": 0.8332234673698088, "grad_norm": 10.210542678833008, "learning_rate": 5.827861579414375e-06, "log_odds_chosen": 0.057153940200805664, "log_odds_ratio": -0.6794668436050415, "logits/chosen": -1.0624723434448242, "logits/rejected": -0.9977174997329712, "logps/chosen": -1.5775511264801025, "logps/rejected": -1.6190568208694458, "loss": 2.9337, "nll_loss": 0.6654831767082214, "rewards/accuracies": 0.625, "rewards/chosen": -0.15775510668754578, "rewards/margins": 0.004150571301579475, "rewards/rejected": -0.1619056761264801, "step": 316 }, { "epoch": 0.8358602504943968, "grad_norm": 10.235295295715332, "learning_rate": 5.820763087843833e-06, "log_odds_chosen": 0.12494079768657684, "log_odds_ratio": -0.645836591720581, "logits/chosen": -1.0902093648910522, "logits/rejected": -1.016379952430725, "logps/chosen": -1.5835883617401123, "logps/rejected": -1.687776803970337, "loss": 2.8211, "nll_loss": 0.6406936645507812, "rewards/accuracies": 0.625, "rewards/chosen": -0.1583588421344757, "rewards/margins": 0.010418838821351528, "rewards/rejected": -0.1687776893377304, "step": 317 }, { "epoch": 0.8384970336189849, "grad_norm": 10.28027629852295, "learning_rate": 5.813664596273292e-06, "log_odds_chosen": 0.343504935503006, "log_odds_ratio": -0.5613068342208862, "logits/chosen": -1.0396804809570312, "logits/rejected": -0.9597609043121338, "logps/chosen": -1.4261085987091064, "logps/rejected": -1.692732572555542, "loss": 2.5255, "nll_loss": 0.5752564072608948, "rewards/accuracies": 0.75, "rewards/chosen": -0.1426108479499817, "rewards/margins": 0.026662401854991913, "rewards/rejected": -0.1692732572555542, "step": 318 }, { "epoch": 0.8411338167435728, "grad_norm": 10.127836227416992, "learning_rate": 5.8065661047027505e-06, "log_odds_chosen": 0.34942787885665894, "log_odds_ratio": -0.541016697883606, "logits/chosen": -1.0738239288330078, "logits/rejected": -0.9604414105415344, "logps/chosen": -1.5903185606002808, "logps/rejected": -1.8677293062210083, "loss": 2.8788, "nll_loss": 0.6655933260917664, "rewards/accuracies": 0.875, "rewards/chosen": -0.15903185307979584, "rewards/margins": 0.02774108201265335, "rewards/rejected": -0.1867729276418686, "step": 319 }, { "epoch": 0.8437705998681608, "grad_norm": 10.790935516357422, "learning_rate": 5.799467613132209e-06, "log_odds_chosen": 0.153792604804039, "log_odds_ratio": -0.6239289045333862, "logits/chosen": -1.0212633609771729, "logits/rejected": -0.9398971199989319, "logps/chosen": -1.6086022853851318, "logps/rejected": -1.7359929084777832, "loss": 2.607, "nll_loss": 0.5893584489822388, "rewards/accuracies": 0.875, "rewards/chosen": -0.16086022555828094, "rewards/margins": 0.012739075347781181, "rewards/rejected": -0.17359930276870728, "step": 320 }, { "epoch": 0.8464073829927489, "grad_norm": 9.60370922088623, "learning_rate": 5.792369121561668e-06, "log_odds_chosen": 0.9587531089782715, "log_odds_ratio": -0.35142982006073, "logits/chosen": -1.0414528846740723, "logits/rejected": -0.9721252918243408, "logps/chosen": -1.310626745223999, "logps/rejected": -2.090373992919922, "loss": 2.1543, "nll_loss": 0.5034200549125671, "rewards/accuracies": 1.0, "rewards/chosen": -0.13106267154216766, "rewards/margins": 0.07797471433877945, "rewards/rejected": -0.20903737843036652, "step": 321 }, { "epoch": 0.8490441661173368, "grad_norm": 9.56372356414795, "learning_rate": 5.785270629991126e-06, "log_odds_chosen": 0.42057231068611145, "log_odds_ratio": -0.5202645659446716, "logits/chosen": -1.0739099979400635, "logits/rejected": -0.9998117685317993, "logps/chosen": -1.427781105041504, "logps/rejected": -1.7579572200775146, "loss": 2.5026, "nll_loss": 0.5736272931098938, "rewards/accuracies": 1.0, "rewards/chosen": -0.14277812838554382, "rewards/margins": 0.033017609268426895, "rewards/rejected": -0.17579573392868042, "step": 322 }, { "epoch": 0.8516809492419248, "grad_norm": 10.817325592041016, "learning_rate": 5.778172138420585e-06, "log_odds_chosen": 0.2748401165008545, "log_odds_ratio": -0.5693619251251221, "logits/chosen": -1.0628693103790283, "logits/rejected": -0.9903963804244995, "logps/chosen": -1.7415344715118408, "logps/rejected": -1.9746755361557007, "loss": 2.8572, "nll_loss": 0.6573653221130371, "rewards/accuracies": 1.0, "rewards/chosen": -0.17415344715118408, "rewards/margins": 0.023314107209444046, "rewards/rejected": -0.19746755063533783, "step": 323 }, { "epoch": 0.8543177323665129, "grad_norm": 10.59162712097168, "learning_rate": 5.771073646850044e-06, "log_odds_chosen": 0.2887657582759857, "log_odds_ratio": -0.5717019438743591, "logits/chosen": -1.0636672973632812, "logits/rejected": -0.9916437864303589, "logps/chosen": -1.5421836376190186, "logps/rejected": -1.7747539281845093, "loss": 2.8713, "nll_loss": 0.6606504917144775, "rewards/accuracies": 0.625, "rewards/chosen": -0.15421836078166962, "rewards/margins": 0.023257026448845863, "rewards/rejected": -0.17747539281845093, "step": 324 }, { "epoch": 0.8569545154911009, "grad_norm": 10.08297348022461, "learning_rate": 5.763975155279503e-06, "log_odds_chosen": 0.43007218837738037, "log_odds_ratio": -0.5519193410873413, "logits/chosen": -1.0398885011672974, "logits/rejected": -0.9652789235115051, "logps/chosen": -1.4443862438201904, "logps/rejected": -1.7982938289642334, "loss": 2.3122, "nll_loss": 0.5228697061538696, "rewards/accuracies": 0.75, "rewards/chosen": -0.14443862438201904, "rewards/margins": 0.03539075329899788, "rewards/rejected": -0.17982937395572662, "step": 325 }, { "epoch": 0.8595912986156888, "grad_norm": 10.488415718078613, "learning_rate": 5.756876663708961e-06, "log_odds_chosen": 0.3969072103500366, "log_odds_ratio": -0.5206924676895142, "logits/chosen": -1.0502766370773315, "logits/rejected": -0.9669512510299683, "logps/chosen": -1.6841930150985718, "logps/rejected": -2.0151259899139404, "loss": 2.9243, "nll_loss": 0.6790080070495605, "rewards/accuracies": 1.0, "rewards/chosen": -0.16841931641101837, "rewards/margins": 0.033093273639678955, "rewards/rejected": -0.20151257514953613, "step": 326 }, { "epoch": 0.8622280817402769, "grad_norm": 10.095006942749023, "learning_rate": 5.749778172138421e-06, "log_odds_chosen": 0.456265926361084, "log_odds_ratio": -0.5051561594009399, "logits/chosen": -1.0466737747192383, "logits/rejected": -0.947853684425354, "logps/chosen": -1.474120020866394, "logps/rejected": -1.8521654605865479, "loss": 2.1415, "nll_loss": 0.4848534166812897, "rewards/accuracies": 1.0, "rewards/chosen": -0.1474120020866394, "rewards/margins": 0.037804536521434784, "rewards/rejected": -0.18521654605865479, "step": 327 }, { "epoch": 0.8648648648648649, "grad_norm": 9.11341667175293, "learning_rate": 5.742679680567879e-06, "log_odds_chosen": 0.3190004229545593, "log_odds_ratio": -0.5560978055000305, "logits/chosen": -0.9748751521110535, "logits/rejected": -0.8502944707870483, "logps/chosen": -1.3002684116363525, "logps/rejected": -1.544651746749878, "loss": 1.8325, "nll_loss": 0.4025198817253113, "rewards/accuracies": 0.875, "rewards/chosen": -0.13002684712409973, "rewards/margins": 0.02443832904100418, "rewards/rejected": -0.15446516871452332, "step": 328 }, { "epoch": 0.8675016479894528, "grad_norm": 10.462133407592773, "learning_rate": 5.735581188997338e-06, "log_odds_chosen": 0.24863064289093018, "log_odds_ratio": -0.5877097249031067, "logits/chosen": -0.9955823421478271, "logits/rejected": -0.9353163838386536, "logps/chosen": -1.693455457687378, "logps/rejected": -1.8955365419387817, "loss": 3.0426, "nll_loss": 0.7018847465515137, "rewards/accuracies": 0.75, "rewards/chosen": -0.16934554278850555, "rewards/margins": 0.020208114758133888, "rewards/rejected": -0.1895536482334137, "step": 329 }, { "epoch": 0.8701384311140409, "grad_norm": 10.17776107788086, "learning_rate": 5.728482697426797e-06, "log_odds_chosen": 0.40002769231796265, "log_odds_ratio": -0.5155510306358337, "logits/chosen": -1.1227819919586182, "logits/rejected": -0.9743717908859253, "logps/chosen": -1.4541687965393066, "logps/rejected": -1.7705214023590088, "loss": 2.7577, "nll_loss": 0.6378784775733948, "rewards/accuracies": 1.0, "rewards/chosen": -0.14541688561439514, "rewards/margins": 0.03163526952266693, "rewards/rejected": -0.17705215513706207, "step": 330 }, { "epoch": 0.8727752142386289, "grad_norm": 10.216160774230957, "learning_rate": 5.721384205856255e-06, "log_odds_chosen": 0.5271902084350586, "log_odds_ratio": -0.4871942102909088, "logits/chosen": -0.994499921798706, "logits/rejected": -0.9453480243682861, "logps/chosen": -1.3821659088134766, "logps/rejected": -1.7877823114395142, "loss": 2.3179, "nll_loss": 0.5307646989822388, "rewards/accuracies": 0.875, "rewards/chosen": -0.13821661472320557, "rewards/margins": 0.04056163504719734, "rewards/rejected": -0.17877823114395142, "step": 331 }, { "epoch": 0.8754119973632168, "grad_norm": 10.18932819366455, "learning_rate": 5.7142857142857145e-06, "log_odds_chosen": 0.5570278167724609, "log_odds_ratio": -0.4699929654598236, "logits/chosen": -1.0728272199630737, "logits/rejected": -0.9810482263565063, "logps/chosen": -1.4654444456100464, "logps/rejected": -1.9071202278137207, "loss": 2.6198, "nll_loss": 0.6079585552215576, "rewards/accuracies": 1.0, "rewards/chosen": -0.1465444564819336, "rewards/margins": 0.044167570769786835, "rewards/rejected": -0.19071203470230103, "step": 332 }, { "epoch": 0.8780487804878049, "grad_norm": 10.48474407196045, "learning_rate": 5.7071872227151726e-06, "log_odds_chosen": 0.3994866907596588, "log_odds_ratio": -0.5195596814155579, "logits/chosen": -1.068738579750061, "logits/rejected": -1.0077134370803833, "logps/chosen": -1.590066909790039, "logps/rejected": -1.9161949157714844, "loss": 3.0394, "nll_loss": 0.7078840732574463, "rewards/accuracies": 1.0, "rewards/chosen": -0.15900669991970062, "rewards/margins": 0.03261279687285423, "rewards/rejected": -0.19161948561668396, "step": 333 }, { "epoch": 0.8806855636123929, "grad_norm": 10.939852714538574, "learning_rate": 5.7000887311446315e-06, "log_odds_chosen": 0.4621232748031616, "log_odds_ratio": -0.5040398240089417, "logits/chosen": -1.11872398853302, "logits/rejected": -0.9806749820709229, "logps/chosen": -1.4889421463012695, "logps/rejected": -1.8606245517730713, "loss": 2.736, "nll_loss": 0.6336002945899963, "rewards/accuracies": 0.875, "rewards/chosen": -0.14889422059059143, "rewards/margins": 0.0371682345867157, "rewards/rejected": -0.18606245517730713, "step": 334 }, { "epoch": 0.8833223467369808, "grad_norm": 10.190740585327148, "learning_rate": 5.69299023957409e-06, "log_odds_chosen": 0.37728533148765564, "log_odds_ratio": -0.5308787226676941, "logits/chosen": -1.0650832653045654, "logits/rejected": -0.9698933362960815, "logps/chosen": -1.6103969812393188, "logps/rejected": -1.924202561378479, "loss": 2.8435, "nll_loss": 0.6577939987182617, "rewards/accuracies": 0.875, "rewards/chosen": -0.16103971004486084, "rewards/margins": 0.03138056397438049, "rewards/rejected": -0.19242027401924133, "step": 335 }, { "epoch": 0.8859591298615689, "grad_norm": 10.346920013427734, "learning_rate": 5.685891748003549e-06, "log_odds_chosen": 0.5905814170837402, "log_odds_ratio": -0.4763306975364685, "logits/chosen": -1.0905077457427979, "logits/rejected": -0.9927341938018799, "logps/chosen": -1.4622387886047363, "logps/rejected": -1.9198663234710693, "loss": 2.724, "nll_loss": 0.6333789229393005, "rewards/accuracies": 1.0, "rewards/chosen": -0.14622387290000916, "rewards/margins": 0.04576274752616882, "rewards/rejected": -0.19198662042617798, "step": 336 }, { "epoch": 0.8885959129861569, "grad_norm": 9.778738021850586, "learning_rate": 5.678793256433007e-06, "log_odds_chosen": 0.3346019983291626, "log_odds_ratio": -0.547289252281189, "logits/chosen": -1.0419234037399292, "logits/rejected": -0.9784678816795349, "logps/chosen": -1.5617038011550903, "logps/rejected": -1.835525393486023, "loss": 2.445, "nll_loss": 0.556515634059906, "rewards/accuracies": 1.0, "rewards/chosen": -0.15617038309574127, "rewards/margins": 0.027382152155041695, "rewards/rejected": -0.18355253338813782, "step": 337 }, { "epoch": 0.8912326961107448, "grad_norm": 10.788568496704102, "learning_rate": 5.671694764862467e-06, "log_odds_chosen": 0.4987928867340088, "log_odds_ratio": -0.49477386474609375, "logits/chosen": -1.0442382097244263, "logits/rejected": -0.9384560585021973, "logps/chosen": -1.481365442276001, "logps/rejected": -1.8602831363677979, "loss": 2.8371, "nll_loss": 0.6598080992698669, "rewards/accuracies": 1.0, "rewards/chosen": -0.14813652634620667, "rewards/margins": 0.03789178282022476, "rewards/rejected": -0.18602833151817322, "step": 338 }, { "epoch": 0.8938694792353329, "grad_norm": 9.854400634765625, "learning_rate": 5.664596273291925e-06, "log_odds_chosen": 0.4089469909667969, "log_odds_ratio": -0.514413058757782, "logits/chosen": -1.0501763820648193, "logits/rejected": -0.9793403148651123, "logps/chosen": -1.3807724714279175, "logps/rejected": -1.6960806846618652, "loss": 2.5394, "nll_loss": 0.583407461643219, "rewards/accuracies": 1.0, "rewards/chosen": -0.1380772590637207, "rewards/margins": 0.03153081610798836, "rewards/rejected": -0.16960807144641876, "step": 339 }, { "epoch": 0.8965062623599209, "grad_norm": 10.947417259216309, "learning_rate": 5.657497781721383e-06, "log_odds_chosen": 0.44161561131477356, "log_odds_ratio": -0.5027471780776978, "logits/chosen": -1.0611671209335327, "logits/rejected": -0.9183800220489502, "logps/chosen": -1.4360787868499756, "logps/rejected": -1.7909188270568848, "loss": 2.8081, "nll_loss": 0.6517510414123535, "rewards/accuracies": 1.0, "rewards/chosen": -0.14360786974430084, "rewards/margins": 0.03548401594161987, "rewards/rejected": -0.17909188568592072, "step": 340 }, { "epoch": 0.8991430454845089, "grad_norm": 10.07443618774414, "learning_rate": 5.650399290150843e-06, "log_odds_chosen": 0.2103627473115921, "log_odds_ratio": -0.5991295576095581, "logits/chosen": -1.0860145092010498, "logits/rejected": -1.0008509159088135, "logps/chosen": -1.425404667854309, "logps/rejected": -1.5859260559082031, "loss": 2.7957, "nll_loss": 0.6390042304992676, "rewards/accuracies": 0.875, "rewards/chosen": -0.14254045486450195, "rewards/margins": 0.016052136197686195, "rewards/rejected": -0.1585926115512848, "step": 341 }, { "epoch": 0.9017798286090969, "grad_norm": 10.20143985748291, "learning_rate": 5.643300798580301e-06, "log_odds_chosen": 0.4558635950088501, "log_odds_ratio": -0.5051071643829346, "logits/chosen": -1.1710106134414673, "logits/rejected": -1.0035669803619385, "logps/chosen": -1.4524896144866943, "logps/rejected": -1.8239222764968872, "loss": 3.073, "nll_loss": 0.717727780342102, "rewards/accuracies": 0.875, "rewards/chosen": -0.14524896442890167, "rewards/margins": 0.0371432825922966, "rewards/rejected": -0.18239223957061768, "step": 342 }, { "epoch": 0.9044166117336849, "grad_norm": 9.72130012512207, "learning_rate": 5.636202307009761e-06, "log_odds_chosen": 0.47912660241127014, "log_odds_ratio": -0.48404398560523987, "logits/chosen": -1.0630624294281006, "logits/rejected": -0.9384533166885376, "logps/chosen": -1.3418054580688477, "logps/rejected": -1.7115063667297363, "loss": 2.3891, "nll_loss": 0.5488689541816711, "rewards/accuracies": 1.0, "rewards/chosen": -0.13418054580688477, "rewards/margins": 0.03697009012103081, "rewards/rejected": -0.17115065455436707, "step": 343 }, { "epoch": 0.9070533948582729, "grad_norm": 10.672527313232422, "learning_rate": 5.629103815439219e-06, "log_odds_chosen": 0.4753820300102234, "log_odds_ratio": -0.4976285994052887, "logits/chosen": -1.090612769126892, "logits/rejected": -0.9278324842453003, "logps/chosen": -1.5014450550079346, "logps/rejected": -1.8953273296356201, "loss": 2.7992, "nll_loss": 0.6500457525253296, "rewards/accuracies": 1.0, "rewards/chosen": -0.1501445174217224, "rewards/margins": 0.03938821703195572, "rewards/rejected": -0.18953272700309753, "step": 344 }, { "epoch": 0.9096901779828609, "grad_norm": 10.501937866210938, "learning_rate": 5.622005323868678e-06, "log_odds_chosen": 0.3252415359020233, "log_odds_ratio": -0.5536338686943054, "logits/chosen": -1.1071209907531738, "logits/rejected": -1.0367724895477295, "logps/chosen": -1.341892123222351, "logps/rejected": -1.5972182750701904, "loss": 2.475, "nll_loss": 0.5633936524391174, "rewards/accuracies": 1.0, "rewards/chosen": -0.13418921828269958, "rewards/margins": 0.02553262561559677, "rewards/rejected": -0.15972182154655457, "step": 345 }, { "epoch": 0.9123269611074489, "grad_norm": 10.16054630279541, "learning_rate": 5.6149068322981366e-06, "log_odds_chosen": 0.27317702770233154, "log_odds_ratio": -0.5727741718292236, "logits/chosen": -1.0333210229873657, "logits/rejected": -0.957564651966095, "logps/chosen": -1.5706524848937988, "logps/rejected": -1.7979657649993896, "loss": 3.005, "nll_loss": 0.6939662098884583, "rewards/accuracies": 0.875, "rewards/chosen": -0.1570652574300766, "rewards/margins": 0.022731315344572067, "rewards/rejected": -0.17979657649993896, "step": 346 }, { "epoch": 0.914963744232037, "grad_norm": 9.568496704101562, "learning_rate": 5.6078083407275955e-06, "log_odds_chosen": 0.40468311309814453, "log_odds_ratio": -0.5176348090171814, "logits/chosen": -1.1225817203521729, "logits/rejected": -1.0085132122039795, "logps/chosen": -1.4263901710510254, "logps/rejected": -1.7484097480773926, "loss": 2.6263, "nll_loss": 0.6048138737678528, "rewards/accuracies": 1.0, "rewards/chosen": -0.14263901114463806, "rewards/margins": 0.03220197558403015, "rewards/rejected": -0.1748409867286682, "step": 347 }, { "epoch": 0.9176005273566249, "grad_norm": 9.960814476013184, "learning_rate": 5.6007098491570535e-06, "log_odds_chosen": 0.3930380344390869, "log_odds_ratio": -0.5187526345252991, "logits/chosen": -1.0023831129074097, "logits/rejected": -0.8820410370826721, "logps/chosen": -1.4457470178604126, "logps/rejected": -1.7578613758087158, "loss": 2.2526, "nll_loss": 0.5112727284431458, "rewards/accuracies": 1.0, "rewards/chosen": -0.14457471668720245, "rewards/margins": 0.031211430206894875, "rewards/rejected": -0.17578613758087158, "step": 348 }, { "epoch": 0.9202373104812129, "grad_norm": 10.322500228881836, "learning_rate": 5.593611357586512e-06, "log_odds_chosen": 0.2274288386106491, "log_odds_ratio": -0.5969381332397461, "logits/chosen": -1.02559494972229, "logits/rejected": -0.922670841217041, "logps/chosen": -1.7443609237670898, "logps/rejected": -1.9361728429794312, "loss": 2.6544, "nll_loss": 0.6039038300514221, "rewards/accuracies": 0.75, "rewards/chosen": -0.17443609237670898, "rewards/margins": 0.019181186333298683, "rewards/rejected": -0.19361728429794312, "step": 349 }, { "epoch": 0.922874093605801, "grad_norm": 9.499584197998047, "learning_rate": 5.586512866015971e-06, "log_odds_chosen": 0.5290266275405884, "log_odds_ratio": -0.48338058590888977, "logits/chosen": -1.0399274826049805, "logits/rejected": -0.9718201160430908, "logps/chosen": -1.174059271812439, "logps/rejected": -1.534996509552002, "loss": 2.229, "nll_loss": 0.5089079737663269, "rewards/accuracies": 1.0, "rewards/chosen": -0.11740593612194061, "rewards/margins": 0.03609371930360794, "rewards/rejected": -0.15349964797496796, "step": 350 }, { "epoch": 0.9255108767303889, "grad_norm": 10.729506492614746, "learning_rate": 5.57941437444543e-06, "log_odds_chosen": 0.26497402787208557, "log_odds_ratio": -0.5773090720176697, "logits/chosen": -1.126983880996704, "logits/rejected": -1.0317203998565674, "logps/chosen": -1.5925211906433105, "logps/rejected": -1.8076058626174927, "loss": 2.9425, "nll_loss": 0.6779011487960815, "rewards/accuracies": 0.75, "rewards/chosen": -0.1592521369457245, "rewards/margins": 0.021508460864424706, "rewards/rejected": -0.18076059222221375, "step": 351 }, { "epoch": 0.9281476598549769, "grad_norm": 10.258927345275879, "learning_rate": 5.572315882874889e-06, "log_odds_chosen": 0.2672571837902069, "log_odds_ratio": -0.5744418501853943, "logits/chosen": -1.1412999629974365, "logits/rejected": -1.0702255964279175, "logps/chosen": -1.3987594842910767, "logps/rejected": -1.6080760955810547, "loss": 2.8588, "nll_loss": 0.6572588086128235, "rewards/accuracies": 1.0, "rewards/chosen": -0.13987594842910767, "rewards/margins": 0.020931649953126907, "rewards/rejected": -0.16080759465694427, "step": 352 }, { "epoch": 0.930784442979565, "grad_norm": 10.075135231018066, "learning_rate": 5.565217391304347e-06, "log_odds_chosen": 0.28277862071990967, "log_odds_ratio": -0.5662992000579834, "logits/chosen": -1.0691959857940674, "logits/rejected": -0.9810045957565308, "logps/chosen": -1.5185537338256836, "logps/rejected": -1.7473249435424805, "loss": 2.8186, "nll_loss": 0.6480088233947754, "rewards/accuracies": 0.875, "rewards/chosen": -0.15185536444187164, "rewards/margins": 0.022877134382724762, "rewards/rejected": -0.174732506275177, "step": 353 }, { "epoch": 0.9334212261041529, "grad_norm": 10.633783340454102, "learning_rate": 5.558118899733807e-06, "log_odds_chosen": 0.672451913356781, "log_odds_ratio": -0.44251322746276855, "logits/chosen": -1.0403993129730225, "logits/rejected": -0.9178810119628906, "logps/chosen": -1.4975465536117554, "logps/rejected": -2.0363898277282715, "loss": 2.5513, "nll_loss": 0.5935852527618408, "rewards/accuracies": 1.0, "rewards/chosen": -0.14975465834140778, "rewards/margins": 0.05388431251049042, "rewards/rejected": -0.2036389708518982, "step": 354 }, { "epoch": 0.9360580092287409, "grad_norm": 10.344921112060547, "learning_rate": 5.551020408163265e-06, "log_odds_chosen": 0.4677634537220001, "log_odds_ratio": -0.4962654411792755, "logits/chosen": -1.047995924949646, "logits/rejected": -0.934207558631897, "logps/chosen": -1.47959566116333, "logps/rejected": -1.8597514629364014, "loss": 2.4314, "nll_loss": 0.5582197904586792, "rewards/accuracies": 1.0, "rewards/chosen": -0.14795956015586853, "rewards/margins": 0.03801558539271355, "rewards/rejected": -0.18597514927387238, "step": 355 }, { "epoch": 0.938694792353329, "grad_norm": 10.867266654968262, "learning_rate": 5.543921916592724e-06, "log_odds_chosen": 0.17453785240650177, "log_odds_ratio": -0.6157727241516113, "logits/chosen": -1.0531580448150635, "logits/rejected": -1.0111100673675537, "logps/chosen": -1.4414186477661133, "logps/rejected": -1.5767408609390259, "loss": 2.5583, "nll_loss": 0.5779985189437866, "rewards/accuracies": 0.75, "rewards/chosen": -0.14414185285568237, "rewards/margins": 0.013532244600355625, "rewards/rejected": -0.15767410397529602, "step": 356 }, { "epoch": 0.9413315754779169, "grad_norm": 9.612220764160156, "learning_rate": 5.536823425022183e-06, "log_odds_chosen": 0.49452072381973267, "log_odds_ratio": -0.4899476170539856, "logits/chosen": -1.0678932666778564, "logits/rejected": -0.9185231924057007, "logps/chosen": -1.3783776760101318, "logps/rejected": -1.7587839365005493, "loss": 2.3197, "nll_loss": 0.5309328436851501, "rewards/accuracies": 0.875, "rewards/chosen": -0.13783776760101318, "rewards/margins": 0.03804062306880951, "rewards/rejected": -0.1758784055709839, "step": 357 }, { "epoch": 0.9439683586025049, "grad_norm": 9.743525505065918, "learning_rate": 5.529724933451641e-06, "log_odds_chosen": 0.6043331027030945, "log_odds_ratio": -0.45537304878234863, "logits/chosen": -1.034125566482544, "logits/rejected": -0.9460897445678711, "logps/chosen": -1.2822744846343994, "logps/rejected": -1.7196097373962402, "loss": 2.1604, "nll_loss": 0.49456602334976196, "rewards/accuracies": 1.0, "rewards/chosen": -0.12822744250297546, "rewards/margins": 0.043733518570661545, "rewards/rejected": -0.1719609498977661, "step": 358 }, { "epoch": 0.946605141727093, "grad_norm": 9.58714771270752, "learning_rate": 5.5226264418811006e-06, "log_odds_chosen": 0.41552817821502686, "log_odds_ratio": -0.5585276484489441, "logits/chosen": -1.0117213726043701, "logits/rejected": -0.9441577792167664, "logps/chosen": -1.4235024452209473, "logps/rejected": -1.7747458219528198, "loss": 2.1996, "nll_loss": 0.49404376745224, "rewards/accuracies": 0.75, "rewards/chosen": -0.14235025644302368, "rewards/margins": 0.035124339163303375, "rewards/rejected": -0.17747458815574646, "step": 359 }, { "epoch": 0.9492419248516809, "grad_norm": 10.443949699401855, "learning_rate": 5.515527950310559e-06, "log_odds_chosen": 0.26815563440322876, "log_odds_ratio": -0.5817369222640991, "logits/chosen": -1.004830002784729, "logits/rejected": -0.9195725321769714, "logps/chosen": -1.612457036972046, "logps/rejected": -1.839476466178894, "loss": 2.6319, "nll_loss": 0.5997951030731201, "rewards/accuracies": 0.75, "rewards/chosen": -0.1612457036972046, "rewards/margins": 0.022701943293213844, "rewards/rejected": -0.18394765257835388, "step": 360 }, { "epoch": 0.951878707976269, "grad_norm": 9.963574409484863, "learning_rate": 5.5084294587400175e-06, "log_odds_chosen": 0.2849772572517395, "log_odds_ratio": -0.5643788576126099, "logits/chosen": -1.0444022417068481, "logits/rejected": -0.9414665699005127, "logps/chosen": -1.5040690898895264, "logps/rejected": -1.7351229190826416, "loss": 2.6506, "nll_loss": 0.6061998605728149, "rewards/accuracies": 0.875, "rewards/chosen": -0.15040689706802368, "rewards/margins": 0.023105382919311523, "rewards/rejected": -0.1735122799873352, "step": 361 }, { "epoch": 0.954515491100857, "grad_norm": 10.363738059997559, "learning_rate": 5.501330967169476e-06, "log_odds_chosen": 0.322379469871521, "log_odds_ratio": -0.5474145412445068, "logits/chosen": -1.1142265796661377, "logits/rejected": -1.0194593667984009, "logps/chosen": -1.393839716911316, "logps/rejected": -1.6361058950424194, "loss": 2.849, "nll_loss": 0.6575071811676025, "rewards/accuracies": 1.0, "rewards/chosen": -0.1393839716911316, "rewards/margins": 0.024226615205407143, "rewards/rejected": -0.16361059248447418, "step": 362 }, { "epoch": 0.9571522742254449, "grad_norm": 9.178751945495605, "learning_rate": 5.494232475598935e-06, "log_odds_chosen": 0.2973731458187103, "log_odds_ratio": -0.5822466611862183, "logits/chosen": -0.9972629547119141, "logits/rejected": -0.9414466619491577, "logps/chosen": -1.4518791437149048, "logps/rejected": -1.6809438467025757, "loss": 2.2369, "nll_loss": 0.501007080078125, "rewards/accuracies": 0.625, "rewards/chosen": -0.14518792927265167, "rewards/margins": 0.022906456142663956, "rewards/rejected": -0.16809438169002533, "step": 363 }, { "epoch": 0.959789057350033, "grad_norm": 10.308340072631836, "learning_rate": 5.487133984028393e-06, "log_odds_chosen": 0.19245854020118713, "log_odds_ratio": -0.6071769595146179, "logits/chosen": -1.0446027517318726, "logits/rejected": -0.9913628697395325, "logps/chosen": -1.4108364582061768, "logps/rejected": -1.5582778453826904, "loss": 2.4827, "nll_loss": 0.5599597692489624, "rewards/accuracies": 0.75, "rewards/chosen": -0.14108365774154663, "rewards/margins": 0.014744145795702934, "rewards/rejected": -0.15582779049873352, "step": 364 }, { "epoch": 0.962425840474621, "grad_norm": 9.615263938903809, "learning_rate": 5.480035492457852e-06, "log_odds_chosen": 0.4360993504524231, "log_odds_ratio": -0.5149202942848206, "logits/chosen": -1.065071940422058, "logits/rejected": -0.9887789487838745, "logps/chosen": -1.297107458114624, "logps/rejected": -1.6414079666137695, "loss": 2.3168, "nll_loss": 0.5277169942855835, "rewards/accuracies": 0.875, "rewards/chosen": -0.12971073389053345, "rewards/margins": 0.03443005681037903, "rewards/rejected": -0.16414080560207367, "step": 365 }, { "epoch": 0.9650626235992089, "grad_norm": 9.44418716430664, "learning_rate": 5.472937000887311e-06, "log_odds_chosen": 0.37738606333732605, "log_odds_ratio": -0.5338386297225952, "logits/chosen": -1.0057923793792725, "logits/rejected": -0.8819607496261597, "logps/chosen": -1.292341947555542, "logps/rejected": -1.5872286558151245, "loss": 2.5454, "nll_loss": 0.5829612612724304, "rewards/accuracies": 0.875, "rewards/chosen": -0.1292341947555542, "rewards/margins": 0.02948867343366146, "rewards/rejected": -0.1587228775024414, "step": 366 }, { "epoch": 0.967699406723797, "grad_norm": 10.713010787963867, "learning_rate": 5.465838509316769e-06, "log_odds_chosen": 0.3017590045928955, "log_odds_ratio": -0.5661972165107727, "logits/chosen": -0.9886905550956726, "logits/rejected": -0.8983290791511536, "logps/chosen": -1.1995577812194824, "logps/rejected": -1.4248850345611572, "loss": 2.631, "nll_loss": 0.6011286973953247, "rewards/accuracies": 0.75, "rewards/chosen": -0.11995577812194824, "rewards/margins": 0.02253272570669651, "rewards/rejected": -0.142488494515419, "step": 367 }, { "epoch": 0.970336189848385, "grad_norm": 9.215208053588867, "learning_rate": 5.458740017746229e-06, "log_odds_chosen": 0.2892542779445648, "log_odds_ratio": -0.5717054009437561, "logits/chosen": -1.0206345319747925, "logits/rejected": -0.9631547927856445, "logps/chosen": -1.4143803119659424, "logps/rejected": -1.6542450189590454, "loss": 2.5335, "nll_loss": 0.576204776763916, "rewards/accuracies": 0.875, "rewards/chosen": -0.14143803715705872, "rewards/margins": 0.02398647367954254, "rewards/rejected": -0.16542451083660126, "step": 368 }, { "epoch": 0.972972972972973, "grad_norm": 10.076217651367188, "learning_rate": 5.451641526175687e-06, "log_odds_chosen": 0.4455964267253876, "log_odds_ratio": -0.5248406529426575, "logits/chosen": -0.9466066360473633, "logits/rejected": -0.8634353876113892, "logps/chosen": -1.264702558517456, "logps/rejected": -1.5656343698501587, "loss": 2.0119, "nll_loss": 0.450481116771698, "rewards/accuracies": 0.75, "rewards/chosen": -0.12647025287151337, "rewards/margins": 0.030093185603618622, "rewards/rejected": -0.15656344592571259, "step": 369 }, { "epoch": 0.975609756097561, "grad_norm": 10.204376220703125, "learning_rate": 5.444543034605147e-06, "log_odds_chosen": 0.6873888373374939, "log_odds_ratio": -0.4200359582901001, "logits/chosen": -1.0495414733886719, "logits/rejected": -0.9574683904647827, "logps/chosen": -1.3910161256790161, "logps/rejected": -1.9476149082183838, "loss": 2.2624, "nll_loss": 0.5236042141914368, "rewards/accuracies": 1.0, "rewards/chosen": -0.13910160958766937, "rewards/margins": 0.05565987899899483, "rewards/rejected": -0.1947614848613739, "step": 370 }, { "epoch": 0.978246539222149, "grad_norm": 9.809714317321777, "learning_rate": 5.437444543034605e-06, "log_odds_chosen": 0.4018175005912781, "log_odds_ratio": -0.5197112560272217, "logits/chosen": -1.0419161319732666, "logits/rejected": -0.9370607137680054, "logps/chosen": -1.5038185119628906, "logps/rejected": -1.8241524696350098, "loss": 2.4621, "nll_loss": 0.5635530352592468, "rewards/accuracies": 0.875, "rewards/chosen": -0.15038184821605682, "rewards/margins": 0.03203340247273445, "rewards/rejected": -0.18241524696350098, "step": 371 }, { "epoch": 0.980883322346737, "grad_norm": 9.597217559814453, "learning_rate": 5.430346051464064e-06, "log_odds_chosen": 0.4155101180076599, "log_odds_ratio": -0.5198583006858826, "logits/chosen": -1.0249592065811157, "logits/rejected": -0.9465623497962952, "logps/chosen": -1.2694138288497925, "logps/rejected": -1.595078945159912, "loss": 2.3738, "nll_loss": 0.5414611101150513, "rewards/accuracies": 0.875, "rewards/chosen": -0.12694138288497925, "rewards/margins": 0.03256651386618614, "rewards/rejected": -0.1595079004764557, "step": 372 }, { "epoch": 0.983520105471325, "grad_norm": 10.037396430969238, "learning_rate": 5.423247559893523e-06, "log_odds_chosen": 0.39195820689201355, "log_odds_ratio": -0.5266801118850708, "logits/chosen": -1.0737645626068115, "logits/rejected": -1.0174031257629395, "logps/chosen": -1.4611886739730835, "logps/rejected": -1.7686667442321777, "loss": 2.699, "nll_loss": 0.6220787763595581, "rewards/accuracies": 1.0, "rewards/chosen": -0.1461188793182373, "rewards/margins": 0.03074781224131584, "rewards/rejected": -0.17686668038368225, "step": 373 }, { "epoch": 0.986156888595913, "grad_norm": 10.649054527282715, "learning_rate": 5.416149068322981e-06, "log_odds_chosen": 0.4070664346218109, "log_odds_ratio": -0.5168882608413696, "logits/chosen": -1.1013745069503784, "logits/rejected": -0.9411362409591675, "logps/chosen": -1.5345265865325928, "logps/rejected": -1.864203691482544, "loss": 2.871, "nll_loss": 0.6660618782043457, "rewards/accuracies": 0.875, "rewards/chosen": -0.15345266461372375, "rewards/margins": 0.0329677015542984, "rewards/rejected": -0.18642036616802216, "step": 374 }, { "epoch": 0.988793671720501, "grad_norm": 10.262837409973145, "learning_rate": 5.4090505767524396e-06, "log_odds_chosen": 0.3801131546497345, "log_odds_ratio": -0.5231006145477295, "logits/chosen": -1.0540034770965576, "logits/rejected": -0.9516990184783936, "logps/chosen": -1.4297250509262085, "logps/rejected": -1.727344036102295, "loss": 2.4712, "nll_loss": 0.5654802322387695, "rewards/accuracies": 1.0, "rewards/chosen": -0.14297249913215637, "rewards/margins": 0.02976190857589245, "rewards/rejected": -0.17273442447185516, "step": 375 }, { "epoch": 0.991430454845089, "grad_norm": 9.602299690246582, "learning_rate": 5.4019520851818985e-06, "log_odds_chosen": 0.41827696561813354, "log_odds_ratio": -0.5190730094909668, "logits/chosen": -1.040710210800171, "logits/rejected": -0.9790891408920288, "logps/chosen": -1.4436545372009277, "logps/rejected": -1.7767916917800903, "loss": 2.3436, "nll_loss": 0.533988356590271, "rewards/accuracies": 0.875, "rewards/chosen": -0.14436544477939606, "rewards/margins": 0.033313728868961334, "rewards/rejected": -0.177679181098938, "step": 376 }, { "epoch": 0.994067237969677, "grad_norm": 10.112712860107422, "learning_rate": 5.394853593611357e-06, "log_odds_chosen": 0.2984105050563812, "log_odds_ratio": -0.5783449411392212, "logits/chosen": -1.05629563331604, "logits/rejected": -0.9793335795402527, "logps/chosen": -1.4983892440795898, "logps/rejected": -1.7358847856521606, "loss": 2.6497, "nll_loss": 0.6046023368835449, "rewards/accuracies": 0.75, "rewards/chosen": -0.14983892440795898, "rewards/margins": 0.023749545216560364, "rewards/rejected": -0.17358846962451935, "step": 377 }, { "epoch": 0.996704021094265, "grad_norm": 10.661310195922852, "learning_rate": 5.3877551020408154e-06, "log_odds_chosen": 0.313271164894104, "log_odds_ratio": -0.5534718632698059, "logits/chosen": -1.0316417217254639, "logits/rejected": -0.9664131999015808, "logps/chosen": -1.574042797088623, "logps/rejected": -1.8318827152252197, "loss": 2.6505, "nll_loss": 0.6072766184806824, "rewards/accuracies": 1.0, "rewards/chosen": -0.15740427374839783, "rewards/margins": 0.025783995166420937, "rewards/rejected": -0.18318825960159302, "step": 378 }, { "epoch": 0.999340804218853, "grad_norm": 10.49639892578125, "learning_rate": 5.380656610470275e-06, "log_odds_chosen": 0.4876595437526703, "log_odds_ratio": -0.48396334052085876, "logits/chosen": -1.0692003965377808, "logits/rejected": -0.942592978477478, "logps/chosen": -1.5492660999298096, "logps/rejected": -1.9446545839309692, "loss": 2.7173, "nll_loss": 0.6309238076210022, "rewards/accuracies": 1.0, "rewards/chosen": -0.154926598072052, "rewards/margins": 0.03953886032104492, "rewards/rejected": -0.19446545839309692, "step": 379 }, { "epoch": 1.0019775873434411, "grad_norm": 11.285173416137695, "learning_rate": 5.373558118899733e-06, "log_odds_chosen": 0.42649227380752563, "log_odds_ratio": -0.51619553565979, "logits/chosen": -1.1701511144638062, "logits/rejected": -1.0790563821792603, "logps/chosen": -1.349685788154602, "logps/rejected": -1.6809329986572266, "loss": 2.7709, "nll_loss": 0.641103982925415, "rewards/accuracies": 1.0, "rewards/chosen": -0.1349685788154602, "rewards/margins": 0.03312472254037857, "rewards/rejected": -0.16809332370758057, "step": 380 }, { "epoch": 1.004614370468029, "grad_norm": 9.188010215759277, "learning_rate": 5.366459627329193e-06, "log_odds_chosen": 0.9289355874061584, "log_odds_ratio": -0.3500681519508362, "logits/chosen": -1.0177814960479736, "logits/rejected": -0.8898583650588989, "logps/chosen": -1.3462495803833008, "logps/rejected": -2.111600399017334, "loss": 2.2798, "nll_loss": 0.5349517464637756, "rewards/accuracies": 1.0, "rewards/chosen": -0.13462495803833008, "rewards/margins": 0.07653509080410004, "rewards/rejected": -0.21116004884243011, "step": 381 }, { "epoch": 1.007251153592617, "grad_norm": 10.165288925170898, "learning_rate": 5.359361135758651e-06, "log_odds_chosen": 0.3176172375679016, "log_odds_ratio": -0.5604002475738525, "logits/chosen": -0.9797457456588745, "logits/rejected": -0.9105274677276611, "logps/chosen": -1.435107946395874, "logps/rejected": -1.694528341293335, "loss": 2.5792, "nll_loss": 0.5887516140937805, "rewards/accuracies": 0.75, "rewards/chosen": -0.14351078867912292, "rewards/margins": 0.025942042469978333, "rewards/rejected": -0.16945281624794006, "step": 382 }, { "epoch": 1.0098879367172051, "grad_norm": 10.067802429199219, "learning_rate": 5.352262644188109e-06, "log_odds_chosen": 0.3345809578895569, "log_odds_ratio": -0.5521736741065979, "logits/chosen": -1.121835708618164, "logits/rejected": -1.015265703201294, "logps/chosen": -1.5804250240325928, "logps/rejected": -1.8588757514953613, "loss": 2.8837, "nll_loss": 0.6657131910324097, "rewards/accuracies": 0.875, "rewards/chosen": -0.15804249048233032, "rewards/margins": 0.027845071628689766, "rewards/rejected": -0.18588756024837494, "step": 383 }, { "epoch": 1.012524719841793, "grad_norm": 9.84504508972168, "learning_rate": 5.345164152617569e-06, "log_odds_chosen": 0.6527172327041626, "log_odds_ratio": -0.4251009225845337, "logits/chosen": -1.0640441179275513, "logits/rejected": -0.9334746599197388, "logps/chosen": -1.3364887237548828, "logps/rejected": -1.8548702001571655, "loss": 2.3608, "nll_loss": 0.5476840734481812, "rewards/accuracies": 1.0, "rewards/chosen": -0.13364887237548828, "rewards/margins": 0.05183815956115723, "rewards/rejected": -0.1854870468378067, "step": 384 }, { "epoch": 1.015161502966381, "grad_norm": 9.216593742370605, "learning_rate": 5.338065661047027e-06, "log_odds_chosen": 0.7035154104232788, "log_odds_ratio": -0.43145692348480225, "logits/chosen": -1.049159288406372, "logits/rejected": -0.9552618861198425, "logps/chosen": -1.4319472312927246, "logps/rejected": -2.0070128440856934, "loss": 2.2116, "nll_loss": 0.5097614526748657, "rewards/accuracies": 1.0, "rewards/chosen": -0.14319473505020142, "rewards/margins": 0.05750656872987747, "rewards/rejected": -0.2007012814283371, "step": 385 }, { "epoch": 1.0177982860909691, "grad_norm": 10.160662651062012, "learning_rate": 5.330967169476486e-06, "log_odds_chosen": 0.41731148958206177, "log_odds_ratio": -0.5152519345283508, "logits/chosen": -1.0017313957214355, "logits/rejected": -0.8921623826026917, "logps/chosen": -1.5558286905288696, "logps/rejected": -1.9011025428771973, "loss": 2.7231, "nll_loss": 0.629248321056366, "rewards/accuracies": 0.875, "rewards/chosen": -0.15558286011219025, "rewards/margins": 0.03452739492058754, "rewards/rejected": -0.19011026620864868, "step": 386 }, { "epoch": 1.020435069215557, "grad_norm": 9.78851318359375, "learning_rate": 5.323868677905945e-06, "log_odds_chosen": 0.28756850957870483, "log_odds_ratio": -0.5681681036949158, "logits/chosen": -1.0233585834503174, "logits/rejected": -0.9709876775741577, "logps/chosen": -1.409879446029663, "logps/rejected": -1.637386441230774, "loss": 2.1324, "nll_loss": 0.47629034519195557, "rewards/accuracies": 0.875, "rewards/chosen": -0.14098794758319855, "rewards/margins": 0.022750694304704666, "rewards/rejected": -0.16373863816261292, "step": 387 }, { "epoch": 1.023071852340145, "grad_norm": 9.475854873657227, "learning_rate": 5.3167701863354036e-06, "log_odds_chosen": 0.43278807401657104, "log_odds_ratio": -0.5204096436500549, "logits/chosen": -1.0105952024459839, "logits/rejected": -0.9548962712287903, "logps/chosen": -1.4009045362472534, "logps/rejected": -1.716213583946228, "loss": 2.1709, "nll_loss": 0.49069300293922424, "rewards/accuracies": 1.0, "rewards/chosen": -0.1400904655456543, "rewards/margins": 0.03153090178966522, "rewards/rejected": -0.17162136733531952, "step": 388 }, { "epoch": 1.0257086354647331, "grad_norm": 9.53403377532959, "learning_rate": 5.3096716947648625e-06, "log_odds_chosen": 0.5109670162200928, "log_odds_ratio": -0.47555267810821533, "logits/chosen": -1.055998682975769, "logits/rejected": -0.8984580636024475, "logps/chosen": -1.4810338020324707, "logps/rejected": -1.8954992294311523, "loss": 2.4187, "nll_loss": 0.5571247935295105, "rewards/accuracies": 1.0, "rewards/chosen": -0.14810338616371155, "rewards/margins": 0.041446536779403687, "rewards/rejected": -0.18954992294311523, "step": 389 }, { "epoch": 1.028345418589321, "grad_norm": 10.100629806518555, "learning_rate": 5.302573203194321e-06, "log_odds_chosen": 0.3726976811885834, "log_odds_ratio": -0.5383884906768799, "logits/chosen": -1.140406847000122, "logits/rejected": -1.0378773212432861, "logps/chosen": -1.490544319152832, "logps/rejected": -1.7971141338348389, "loss": 2.7248, "nll_loss": 0.6273695826530457, "rewards/accuracies": 0.875, "rewards/chosen": -0.14905443787574768, "rewards/margins": 0.03065699152648449, "rewards/rejected": -0.17971143126487732, "step": 390 }, { "epoch": 1.030982201713909, "grad_norm": 10.351038932800293, "learning_rate": 5.2954747116237794e-06, "log_odds_chosen": 0.4708203375339508, "log_odds_ratio": -0.49839675426483154, "logits/chosen": -1.0792633295059204, "logits/rejected": -0.9910233020782471, "logps/chosen": -1.5079857110977173, "logps/rejected": -1.898199200630188, "loss": 2.6497, "nll_loss": 0.6125774383544922, "rewards/accuracies": 1.0, "rewards/chosen": -0.15079857409000397, "rewards/margins": 0.03902135044336319, "rewards/rejected": -0.18981991708278656, "step": 391 }, { "epoch": 1.0336189848384971, "grad_norm": 10.037555694580078, "learning_rate": 5.288376220053238e-06, "log_odds_chosen": 0.3636520802974701, "log_odds_ratio": -0.5394154787063599, "logits/chosen": -1.0570249557495117, "logits/rejected": -0.9358998537063599, "logps/chosen": -1.3649983406066895, "logps/rejected": -1.6455130577087402, "loss": 2.3773, "nll_loss": 0.5403822064399719, "rewards/accuracies": 0.875, "rewards/chosen": -0.13649982213974, "rewards/margins": 0.028051460161805153, "rewards/rejected": -0.1645512878894806, "step": 392 }, { "epoch": 1.036255767963085, "grad_norm": 10.128984451293945, "learning_rate": 5.281277728482697e-06, "log_odds_chosen": 0.25981563329696655, "log_odds_ratio": -0.578166127204895, "logits/chosen": -1.1017396450042725, "logits/rejected": -0.9830251336097717, "logps/chosen": -1.5177595615386963, "logps/rejected": -1.7209172248840332, "loss": 2.8123, "nll_loss": 0.645270824432373, "rewards/accuracies": 0.75, "rewards/chosen": -0.15177595615386963, "rewards/margins": 0.02031576819717884, "rewards/rejected": -0.17209172248840332, "step": 393 }, { "epoch": 1.038892551087673, "grad_norm": 10.736696243286133, "learning_rate": 5.274179236912155e-06, "log_odds_chosen": 0.42231547832489014, "log_odds_ratio": -0.5113197565078735, "logits/chosen": -1.021410584449768, "logits/rejected": -0.9038841128349304, "logps/chosen": -1.6272623538970947, "logps/rejected": -1.9719369411468506, "loss": 2.9515, "nll_loss": 0.6867390871047974, "rewards/accuracies": 0.875, "rewards/chosen": -0.1627262383699417, "rewards/margins": 0.03446745127439499, "rewards/rejected": -0.1971937119960785, "step": 394 }, { "epoch": 1.0415293342122611, "grad_norm": 10.150442123413086, "learning_rate": 5.267080745341615e-06, "log_odds_chosen": 0.6440749168395996, "log_odds_ratio": -0.43997421860694885, "logits/chosen": -0.9821280837059021, "logits/rejected": -0.9708842039108276, "logps/chosen": -1.400512933731079, "logps/rejected": -1.8895323276519775, "loss": 2.4461, "nll_loss": 0.5675250887870789, "rewards/accuracies": 1.0, "rewards/chosen": -0.14005127549171448, "rewards/margins": 0.04890194535255432, "rewards/rejected": -0.18895323574543, "step": 395 }, { "epoch": 1.044166117336849, "grad_norm": 9.373574256896973, "learning_rate": 5.259982253771073e-06, "log_odds_chosen": 0.5626580119132996, "log_odds_ratio": -0.4574100077152252, "logits/chosen": -1.0475770235061646, "logits/rejected": -1.0139124393463135, "logps/chosen": -1.2695597410202026, "logps/rejected": -1.6981005668640137, "loss": 2.0175, "nll_loss": 0.45862582325935364, "rewards/accuracies": 1.0, "rewards/chosen": -0.12695598602294922, "rewards/margins": 0.04285407438874245, "rewards/rejected": -0.16981005668640137, "step": 396 }, { "epoch": 1.046802900461437, "grad_norm": 9.228339195251465, "learning_rate": 5.252883762200533e-06, "log_odds_chosen": 0.4349876046180725, "log_odds_ratio": -0.5105225443840027, "logits/chosen": -1.016300916671753, "logits/rejected": -0.9554897546768188, "logps/chosen": -1.288040280342102, "logps/rejected": -1.6193768978118896, "loss": 2.2641, "nll_loss": 0.5149763822555542, "rewards/accuracies": 1.0, "rewards/chosen": -0.1288040280342102, "rewards/margins": 0.033133648335933685, "rewards/rejected": -0.1619376838207245, "step": 397 }, { "epoch": 1.0494396835860251, "grad_norm": 9.794448852539062, "learning_rate": 5.245785270629991e-06, "log_odds_chosen": 0.28050416707992554, "log_odds_ratio": -0.5663173198699951, "logits/chosen": -1.0508898496627808, "logits/rejected": -0.9707998037338257, "logps/chosen": -1.4081400632858276, "logps/rejected": -1.6302982568740845, "loss": 2.4217, "nll_loss": 0.5487897396087646, "rewards/accuracies": 0.875, "rewards/chosen": -0.14081400632858276, "rewards/margins": 0.0222158245742321, "rewards/rejected": -0.16302983462810516, "step": 398 }, { "epoch": 1.052076466710613, "grad_norm": 9.469202041625977, "learning_rate": 5.23868677905945e-06, "log_odds_chosen": 0.5130904912948608, "log_odds_ratio": -0.5062092542648315, "logits/chosen": -0.992918074131012, "logits/rejected": -0.9598261117935181, "logps/chosen": -1.3223826885223389, "logps/rejected": -1.7184679508209229, "loss": 2.0811, "nll_loss": 0.46966129541397095, "rewards/accuracies": 0.75, "rewards/chosen": -0.1322382688522339, "rewards/margins": 0.039608534425497055, "rewards/rejected": -0.17184680700302124, "step": 399 }, { "epoch": 1.054713249835201, "grad_norm": 9.868070602416992, "learning_rate": 5.231588287488909e-06, "log_odds_chosen": 0.3694670796394348, "log_odds_ratio": -0.5339500904083252, "logits/chosen": -1.092237949371338, "logits/rejected": -0.9644882678985596, "logps/chosen": -1.4973866939544678, "logps/rejected": -1.7971856594085693, "loss": 2.905, "nll_loss": 0.6728647351264954, "rewards/accuracies": 1.0, "rewards/chosen": -0.14973866939544678, "rewards/margins": 0.029979918152093887, "rewards/rejected": -0.17971858382225037, "step": 400 }, { "epoch": 1.0573500329597891, "grad_norm": 10.309577941894531, "learning_rate": 5.224489795918367e-06, "log_odds_chosen": 0.32514360547065735, "log_odds_ratio": -0.5523920059204102, "logits/chosen": -1.0738779306411743, "logits/rejected": -1.0128540992736816, "logps/chosen": -1.4930551052093506, "logps/rejected": -1.7555242776870728, "loss": 2.9088, "nll_loss": 0.6719701886177063, "rewards/accuracies": 0.875, "rewards/chosen": -0.149305522441864, "rewards/margins": 0.026246918365359306, "rewards/rejected": -0.17555244266986847, "step": 401 }, { "epoch": 1.059986816084377, "grad_norm": 10.44379711151123, "learning_rate": 5.217391304347826e-06, "log_odds_chosen": 0.33604544401168823, "log_odds_ratio": -0.5441054105758667, "logits/chosen": -0.9352221488952637, "logits/rejected": -0.8872804641723633, "logps/chosen": -1.3509907722473145, "logps/rejected": -1.5996346473693848, "loss": 2.6763, "nll_loss": 0.6146520972251892, "rewards/accuracies": 0.875, "rewards/chosen": -0.13509908318519592, "rewards/margins": 0.024864397943019867, "rewards/rejected": -0.1599634736776352, "step": 402 }, { "epoch": 1.062623599208965, "grad_norm": 10.469642639160156, "learning_rate": 5.2102928127772845e-06, "log_odds_chosen": 0.1823788732290268, "log_odds_ratio": -0.6198056936264038, "logits/chosen": -1.0478174686431885, "logits/rejected": -0.9866634607315063, "logps/chosen": -1.7168128490447998, "logps/rejected": -1.8612220287322998, "loss": 2.7923, "nll_loss": 0.636086642742157, "rewards/accuracies": 0.625, "rewards/chosen": -0.17168128490447998, "rewards/margins": 0.014440920203924179, "rewards/rejected": -0.18612220883369446, "step": 403 }, { "epoch": 1.0652603823335531, "grad_norm": 9.132431030273438, "learning_rate": 5.2031943212067434e-06, "log_odds_chosen": 0.4466651380062103, "log_odds_ratio": -0.5053660273551941, "logits/chosen": -1.0298335552215576, "logits/rejected": -0.9548412561416626, "logps/chosen": -1.1939764022827148, "logps/rejected": -1.5169267654418945, "loss": 1.7979, "nll_loss": 0.3989323377609253, "rewards/accuracies": 0.875, "rewards/chosen": -0.11939764022827148, "rewards/margins": 0.03229503333568573, "rewards/rejected": -0.15169267356395721, "step": 404 }, { "epoch": 1.067897165458141, "grad_norm": 10.261722564697266, "learning_rate": 5.1960958296362015e-06, "log_odds_chosen": 0.3869773745536804, "log_odds_ratio": -0.5333517789840698, "logits/chosen": -1.0374103784561157, "logits/rejected": -0.973752498626709, "logps/chosen": -1.4903892278671265, "logps/rejected": -1.7808723449707031, "loss": 2.5851, "nll_loss": 0.5929421782493591, "rewards/accuracies": 0.875, "rewards/chosen": -0.14903892576694489, "rewards/margins": 0.029048318043351173, "rewards/rejected": -0.1780872642993927, "step": 405 }, { "epoch": 1.070533948582729, "grad_norm": 10.340511322021484, "learning_rate": 5.188997338065661e-06, "log_odds_chosen": 0.44721418619155884, "log_odds_ratio": -0.5180215835571289, "logits/chosen": -1.0950462818145752, "logits/rejected": -1.0476313829421997, "logps/chosen": -1.402875542640686, "logps/rejected": -1.748979926109314, "loss": 2.78, "nll_loss": 0.6431941390037537, "rewards/accuracies": 1.0, "rewards/chosen": -0.14028754830360413, "rewards/margins": 0.03461045026779175, "rewards/rejected": -0.17489799857139587, "step": 406 }, { "epoch": 1.0731707317073171, "grad_norm": 9.088520050048828, "learning_rate": 5.181898846495119e-06, "log_odds_chosen": 0.4929713010787964, "log_odds_ratio": -0.48062002658843994, "logits/chosen": -0.9891932606697083, "logits/rejected": -0.9239880442619324, "logps/chosen": -1.1858923435211182, "logps/rejected": -1.5480952262878418, "loss": 1.8229, "nll_loss": 0.4076550602912903, "rewards/accuracies": 1.0, "rewards/chosen": -0.11858922988176346, "rewards/margins": 0.03622030094265938, "rewards/rejected": -0.15480953454971313, "step": 407 }, { "epoch": 1.075807514831905, "grad_norm": 10.639555931091309, "learning_rate": 5.174800354924579e-06, "log_odds_chosen": 0.3192574977874756, "log_odds_ratio": -0.5502256155014038, "logits/chosen": -1.108659029006958, "logits/rejected": -1.0146270990371704, "logps/chosen": -1.526962161064148, "logps/rejected": -1.7815569639205933, "loss": 2.7723, "nll_loss": 0.6380521655082703, "rewards/accuracies": 1.0, "rewards/chosen": -0.15269622206687927, "rewards/margins": 0.025459475815296173, "rewards/rejected": -0.17815569043159485, "step": 408 }, { "epoch": 1.078444297956493, "grad_norm": 9.81585693359375, "learning_rate": 5.167701863354037e-06, "log_odds_chosen": 0.5805134177207947, "log_odds_ratio": -0.45596566796302795, "logits/chosen": -1.0219948291778564, "logits/rejected": -0.9056349396705627, "logps/chosen": -1.3104722499847412, "logps/rejected": -1.7477952241897583, "loss": 2.2402, "nll_loss": 0.5144590735435486, "rewards/accuracies": 0.875, "rewards/chosen": -0.13104721903800964, "rewards/margins": 0.04373229295015335, "rewards/rejected": -0.17477953433990479, "step": 409 }, { "epoch": 1.0810810810810811, "grad_norm": 9.126127243041992, "learning_rate": 5.160603371783495e-06, "log_odds_chosen": 0.7718862891197205, "log_odds_ratio": -0.4180232286453247, "logits/chosen": -1.0102792978286743, "logits/rejected": -0.9463008642196655, "logps/chosen": -1.3646860122680664, "logps/rejected": -2.0148112773895264, "loss": 1.9482, "nll_loss": 0.4452366232872009, "rewards/accuracies": 1.0, "rewards/chosen": -0.13646861910820007, "rewards/margins": 0.06501252204179764, "rewards/rejected": -0.20148113369941711, "step": 410 }, { "epoch": 1.083717864205669, "grad_norm": 9.693203926086426, "learning_rate": 5.153504880212955e-06, "log_odds_chosen": 0.4520252048969269, "log_odds_ratio": -0.5075293779373169, "logits/chosen": -1.0716478824615479, "logits/rejected": -0.9970329999923706, "logps/chosen": -1.3662054538726807, "logps/rejected": -1.7278333902359009, "loss": 2.3542, "nll_loss": 0.5377950668334961, "rewards/accuracies": 0.875, "rewards/chosen": -0.13662053644657135, "rewards/margins": 0.036162812262773514, "rewards/rejected": -0.17278335988521576, "step": 411 }, { "epoch": 1.086354647330257, "grad_norm": 9.425996780395508, "learning_rate": 5.146406388642413e-06, "log_odds_chosen": 0.4897967278957367, "log_odds_ratio": -0.48537391424179077, "logits/chosen": -1.063400149345398, "logits/rejected": -0.960479736328125, "logps/chosen": -1.388871669769287, "logps/rejected": -1.7831190824508667, "loss": 2.3116, "nll_loss": 0.5293655395507812, "rewards/accuracies": 1.0, "rewards/chosen": -0.1388871669769287, "rewards/margins": 0.03942474350333214, "rewards/rejected": -0.17831191420555115, "step": 412 }, { "epoch": 1.0889914304548451, "grad_norm": 10.309266090393066, "learning_rate": 5.139307897071872e-06, "log_odds_chosen": 0.271070271730423, "log_odds_ratio": -0.5740872025489807, "logits/chosen": -1.0737278461456299, "logits/rejected": -0.9297885298728943, "logps/chosen": -1.3509784936904907, "logps/rejected": -1.5514028072357178, "loss": 2.6739, "nll_loss": 0.6110659837722778, "rewards/accuracies": 0.875, "rewards/chosen": -0.13509784638881683, "rewards/margins": 0.020042438060045242, "rewards/rejected": -0.15514028072357178, "step": 413 }, { "epoch": 1.091628213579433, "grad_norm": 9.698198318481445, "learning_rate": 5.132209405501331e-06, "log_odds_chosen": 0.40453335642814636, "log_odds_ratio": -0.5244268774986267, "logits/chosen": -1.020873785018921, "logits/rejected": -0.9422011971473694, "logps/chosen": -1.3256057500839233, "logps/rejected": -1.645374059677124, "loss": 2.1306, "nll_loss": 0.48021090030670166, "rewards/accuracies": 0.875, "rewards/chosen": -0.1325605809688568, "rewards/margins": 0.031976841390132904, "rewards/rejected": -0.16453741490840912, "step": 414 }, { "epoch": 1.094264996704021, "grad_norm": 9.672815322875977, "learning_rate": 5.12511091393079e-06, "log_odds_chosen": 0.5226827263832092, "log_odds_ratio": -0.47890496253967285, "logits/chosen": -1.0159393548965454, "logits/rejected": -0.957647979259491, "logps/chosen": -1.376617431640625, "logps/rejected": -1.7834666967391968, "loss": 2.3519, "nll_loss": 0.5400886535644531, "rewards/accuracies": 1.0, "rewards/chosen": -0.13766175508499146, "rewards/margins": 0.04068492725491524, "rewards/rejected": -0.1783466637134552, "step": 415 }, { "epoch": 1.0969017798286091, "grad_norm": 10.251235961914062, "learning_rate": 5.118012422360248e-06, "log_odds_chosen": 0.5625154376029968, "log_odds_ratio": -0.4570402503013611, "logits/chosen": -1.0735247135162354, "logits/rejected": -0.9383422136306763, "logps/chosen": -1.4990665912628174, "logps/rejected": -1.9590812921524048, "loss": 2.7048, "nll_loss": 0.6304997801780701, "rewards/accuracies": 1.0, "rewards/chosen": -0.14990666508674622, "rewards/margins": 0.04600147530436516, "rewards/rejected": -0.19590812921524048, "step": 416 }, { "epoch": 1.099538562953197, "grad_norm": 9.985235214233398, "learning_rate": 5.1109139307897074e-06, "log_odds_chosen": 0.4486045837402344, "log_odds_ratio": -0.5067667365074158, "logits/chosen": -1.0886329412460327, "logits/rejected": -1.0065972805023193, "logps/chosen": -1.4825245141983032, "logps/rejected": -1.8411731719970703, "loss": 2.6359, "nll_loss": 0.6083040833473206, "rewards/accuracies": 1.0, "rewards/chosen": -0.1482524573802948, "rewards/margins": 0.03586485981941223, "rewards/rejected": -0.18411731719970703, "step": 417 }, { "epoch": 1.102175346077785, "grad_norm": 10.900262832641602, "learning_rate": 5.1038154392191655e-06, "log_odds_chosen": 0.35260820388793945, "log_odds_ratio": -0.5387884974479675, "logits/chosen": -1.152395248413086, "logits/rejected": -1.0343936681747437, "logps/chosen": -1.4348869323730469, "logps/rejected": -1.7173516750335693, "loss": 2.9281, "nll_loss": 0.6781498789787292, "rewards/accuracies": 0.875, "rewards/chosen": -0.14348870515823364, "rewards/margins": 0.028246475383639336, "rewards/rejected": -0.17173516750335693, "step": 418 }, { "epoch": 1.1048121292023731, "grad_norm": 9.87543773651123, "learning_rate": 5.096716947648624e-06, "log_odds_chosen": 0.5148847699165344, "log_odds_ratio": -0.47807198762893677, "logits/chosen": -1.1029891967773438, "logits/rejected": -0.9355456233024597, "logps/chosen": -1.3388253450393677, "logps/rejected": -1.7494006156921387, "loss": 2.4237, "nll_loss": 0.5581195950508118, "rewards/accuracies": 1.0, "rewards/chosen": -0.133882537484169, "rewards/margins": 0.0410575233399868, "rewards/rejected": -0.1749400645494461, "step": 419 }, { "epoch": 1.107448912326961, "grad_norm": 10.111366271972656, "learning_rate": 5.089618456078083e-06, "log_odds_chosen": 0.6326606273651123, "log_odds_ratio": -0.4852484464645386, "logits/chosen": -1.0890178680419922, "logits/rejected": -0.988831639289856, "logps/chosen": -1.4711456298828125, "logps/rejected": -2.0076844692230225, "loss": 2.8792, "nll_loss": 0.6712836623191833, "rewards/accuracies": 1.0, "rewards/chosen": -0.1471145749092102, "rewards/margins": 0.05365390330553055, "rewards/rejected": -0.20076845586299896, "step": 420 }, { "epoch": 1.110085695451549, "grad_norm": 9.26616096496582, "learning_rate": 5.082519964507541e-06, "log_odds_chosen": 0.5602860450744629, "log_odds_ratio": -0.46852120757102966, "logits/chosen": -1.087837815284729, "logits/rejected": -1.003281831741333, "logps/chosen": -1.235466718673706, "logps/rejected": -1.6632263660430908, "loss": 2.1426, "nll_loss": 0.48879703879356384, "rewards/accuracies": 0.875, "rewards/chosen": -0.12354665994644165, "rewards/margins": 0.042775969952344894, "rewards/rejected": -0.16632264852523804, "step": 421 }, { "epoch": 1.1127224785761372, "grad_norm": 10.583718299865723, "learning_rate": 5.075421472937001e-06, "log_odds_chosen": 0.35559818148612976, "log_odds_ratio": -0.5407001376152039, "logits/chosen": -0.9944896697998047, "logits/rejected": -0.9455467462539673, "logps/chosen": -1.3642079830169678, "logps/rejected": -1.643673062324524, "loss": 2.3662, "nll_loss": 0.5374903678894043, "rewards/accuracies": 0.875, "rewards/chosen": -0.1364208161830902, "rewards/margins": 0.027946488931775093, "rewards/rejected": -0.16436730325222015, "step": 422 }, { "epoch": 1.115359261700725, "grad_norm": 9.83077335357666, "learning_rate": 5.068322981366459e-06, "log_odds_chosen": 0.37923258543014526, "log_odds_ratio": -0.5372962951660156, "logits/chosen": -1.0408728122711182, "logits/rejected": -1.0057107210159302, "logps/chosen": -1.2983362674713135, "logps/rejected": -1.584801435470581, "loss": 2.2054, "nll_loss": 0.49762964248657227, "rewards/accuracies": 0.75, "rewards/chosen": -0.1298336386680603, "rewards/margins": 0.028646504506468773, "rewards/rejected": -0.15848013758659363, "step": 423 }, { "epoch": 1.117996044825313, "grad_norm": 9.340259552001953, "learning_rate": 5.061224489795918e-06, "log_odds_chosen": 0.4945228397846222, "log_odds_ratio": -0.4810750484466553, "logits/chosen": -0.9996337294578552, "logits/rejected": -0.9505899548530579, "logps/chosen": -1.3723878860473633, "logps/rejected": -1.766256332397461, "loss": 2.0538, "nll_loss": 0.46534520387649536, "rewards/accuracies": 1.0, "rewards/chosen": -0.13723880052566528, "rewards/margins": 0.03938683122396469, "rewards/rejected": -0.17662563920021057, "step": 424 }, { "epoch": 1.1206328279499012, "grad_norm": 9.9264554977417, "learning_rate": 5.054125998225377e-06, "log_odds_chosen": 0.23201516270637512, "log_odds_ratio": -0.5995275378227234, "logits/chosen": -1.0680444240570068, "logits/rejected": -1.009355068206787, "logps/chosen": -1.3498444557189941, "logps/rejected": -1.5279866456985474, "loss": 2.3752, "nll_loss": 0.5338517427444458, "rewards/accuracies": 0.75, "rewards/chosen": -0.13498443365097046, "rewards/margins": 0.017814233899116516, "rewards/rejected": -0.15279868245124817, "step": 425 }, { "epoch": 1.123269611074489, "grad_norm": 10.826032638549805, "learning_rate": 5.047027506654836e-06, "log_odds_chosen": 0.28324422240257263, "log_odds_ratio": -0.5778370499610901, "logits/chosen": -1.1000683307647705, "logits/rejected": -0.9509692788124084, "logps/chosen": -1.635487675666809, "logps/rejected": -1.8743829727172852, "loss": 3.1423, "nll_loss": 0.7277827262878418, "rewards/accuracies": 0.875, "rewards/chosen": -0.1635487675666809, "rewards/margins": 0.023889539763331413, "rewards/rejected": -0.18743830919265747, "step": 426 }, { "epoch": 1.125906394199077, "grad_norm": 9.88503646850586, "learning_rate": 5.039929015084295e-06, "log_odds_chosen": 0.6358194351196289, "log_odds_ratio": -0.4514765739440918, "logits/chosen": -0.9731795191764832, "logits/rejected": -0.9241248369216919, "logps/chosen": -1.6356950998306274, "logps/rejected": -2.1824707984924316, "loss": 2.0791, "nll_loss": 0.4746202826499939, "rewards/accuracies": 0.875, "rewards/chosen": -0.16356950998306274, "rewards/margins": 0.05467755347490311, "rewards/rejected": -0.21824705600738525, "step": 427 }, { "epoch": 1.1285431773236652, "grad_norm": 10.21439266204834, "learning_rate": 5.032830523513753e-06, "log_odds_chosen": 0.4135708510875702, "log_odds_ratio": -0.5264768600463867, "logits/chosen": -1.0862725973129272, "logits/rejected": -1.0438268184661865, "logps/chosen": -1.42360520362854, "logps/rejected": -1.7432069778442383, "loss": 2.8331, "nll_loss": 0.6556384563446045, "rewards/accuracies": 0.875, "rewards/chosen": -0.14236053824424744, "rewards/margins": 0.031960174441337585, "rewards/rejected": -0.17432071268558502, "step": 428 }, { "epoch": 1.131179960448253, "grad_norm": 10.110530853271484, "learning_rate": 5.025732031943212e-06, "log_odds_chosen": 0.3847730755805969, "log_odds_ratio": -0.5318898558616638, "logits/chosen": -1.0231150388717651, "logits/rejected": -0.8973355293273926, "logps/chosen": -1.4105229377746582, "logps/rejected": -1.7216198444366455, "loss": 2.1062, "nll_loss": 0.4733680486679077, "rewards/accuracies": 1.0, "rewards/chosen": -0.14105229079723358, "rewards/margins": 0.031109701842069626, "rewards/rejected": -0.1721619963645935, "step": 429 }, { "epoch": 1.133816743572841, "grad_norm": 9.565515518188477, "learning_rate": 5.018633540372671e-06, "log_odds_chosen": 0.507100522518158, "log_odds_ratio": -0.5001344680786133, "logits/chosen": -1.053093671798706, "logits/rejected": -1.0269943475723267, "logps/chosen": -1.353144884109497, "logps/rejected": -1.7529642581939697, "loss": 2.6367, "nll_loss": 0.6091610193252563, "rewards/accuracies": 1.0, "rewards/chosen": -0.13531449437141418, "rewards/margins": 0.039981938898563385, "rewards/rejected": -0.17529642581939697, "step": 430 }, { "epoch": 1.1364535266974292, "grad_norm": 10.121060371398926, "learning_rate": 5.0115350488021295e-06, "log_odds_chosen": 0.4973146617412567, "log_odds_ratio": -0.4870888590812683, "logits/chosen": -1.0365742444992065, "logits/rejected": -0.9439826607704163, "logps/chosen": -1.552242398262024, "logps/rejected": -1.9457755088806152, "loss": 2.5115, "nll_loss": 0.5791730284690857, "rewards/accuracies": 1.0, "rewards/chosen": -0.1552242487668991, "rewards/margins": 0.039353299885988235, "rewards/rejected": -0.19457754492759705, "step": 431 }, { "epoch": 1.139090309822017, "grad_norm": 10.338253021240234, "learning_rate": 5.0044365572315875e-06, "log_odds_chosen": 0.5865770578384399, "log_odds_ratio": -0.5449586510658264, "logits/chosen": -0.9849517941474915, "logits/rejected": -0.8847633600234985, "logps/chosen": -1.4991849660873413, "logps/rejected": -2.0054585933685303, "loss": 2.8563, "nll_loss": 0.6595792770385742, "rewards/accuracies": 0.5, "rewards/chosen": -0.14991851150989532, "rewards/margins": 0.05062737315893173, "rewards/rejected": -0.20054587721824646, "step": 432 }, { "epoch": 1.1417270929466052, "grad_norm": 9.96427059173584, "learning_rate": 4.997338065661047e-06, "log_odds_chosen": 0.4820500910282135, "log_odds_ratio": -0.49063819646835327, "logits/chosen": -1.0988550186157227, "logits/rejected": -0.9819570183753967, "logps/chosen": -1.3548251390457153, "logps/rejected": -1.744004487991333, "loss": 2.5723, "nll_loss": 0.5940203666687012, "rewards/accuracies": 0.875, "rewards/chosen": -0.13548250496387482, "rewards/margins": 0.03891792893409729, "rewards/rejected": -0.1744004487991333, "step": 433 }, { "epoch": 1.1443638760711932, "grad_norm": 10.090310096740723, "learning_rate": 4.990239574090505e-06, "log_odds_chosen": 0.4672353267669678, "log_odds_ratio": -0.4982440173625946, "logits/chosen": -1.1151236295700073, "logits/rejected": -1.002767562866211, "logps/chosen": -1.3489617109298706, "logps/rejected": -1.7005971670150757, "loss": 2.4581, "nll_loss": 0.564690351486206, "rewards/accuracies": 0.875, "rewards/chosen": -0.1348961740732193, "rewards/margins": 0.035163555294275284, "rewards/rejected": -0.17005972564220428, "step": 434 }, { "epoch": 1.147000659195781, "grad_norm": 10.10417652130127, "learning_rate": 4.983141082519965e-06, "log_odds_chosen": 0.4814032316207886, "log_odds_ratio": -0.5029208064079285, "logits/chosen": -1.0239670276641846, "logits/rejected": -0.9829295873641968, "logps/chosen": -1.3184508085250854, "logps/rejected": -1.6729843616485596, "loss": 2.0892, "nll_loss": 0.4720097780227661, "rewards/accuracies": 0.75, "rewards/chosen": -0.13184508681297302, "rewards/margins": 0.035453349351882935, "rewards/rejected": -0.16729843616485596, "step": 435 }, { "epoch": 1.1496374423203692, "grad_norm": 9.528637886047363, "learning_rate": 4.976042590949423e-06, "log_odds_chosen": 0.23049712181091309, "log_odds_ratio": -0.5938129425048828, "logits/chosen": -1.0522797107696533, "logits/rejected": -0.9975440502166748, "logps/chosen": -1.4030239582061768, "logps/rejected": -1.583653450012207, "loss": 2.2764, "nll_loss": 0.5097287893295288, "rewards/accuracies": 0.875, "rewards/chosen": -0.1403023898601532, "rewards/margins": 0.01806293986737728, "rewards/rejected": -0.15836533904075623, "step": 436 }, { "epoch": 1.1522742254449572, "grad_norm": 10.909761428833008, "learning_rate": 4.968944099378881e-06, "log_odds_chosen": 0.0930374264717102, "log_odds_ratio": -0.6607744693756104, "logits/chosen": -0.9617547392845154, "logits/rejected": -0.9185218811035156, "logps/chosen": -1.7329206466674805, "logps/rejected": -1.8154338598251343, "loss": 3.0056, "nll_loss": 0.6853236556053162, "rewards/accuracies": 0.5, "rewards/chosen": -0.17329205572605133, "rewards/margins": 0.008251333609223366, "rewards/rejected": -0.18154339492321014, "step": 437 }, { "epoch": 1.154911008569545, "grad_norm": 9.53764820098877, "learning_rate": 4.961845607808341e-06, "log_odds_chosen": 0.6326797604560852, "log_odds_ratio": -0.438091516494751, "logits/chosen": -1.0544040203094482, "logits/rejected": -0.9387926459312439, "logps/chosen": -1.3823257684707642, "logps/rejected": -1.8888732194900513, "loss": 2.1051, "nll_loss": 0.48247024416923523, "rewards/accuracies": 1.0, "rewards/chosen": -0.13823257386684418, "rewards/margins": 0.05065474659204483, "rewards/rejected": -0.1888873279094696, "step": 438 }, { "epoch": 1.1575477916941332, "grad_norm": 10.221098899841309, "learning_rate": 4.954747116237799e-06, "log_odds_chosen": 0.25028908252716064, "log_odds_ratio": -0.5832465887069702, "logits/chosen": -1.045633316040039, "logits/rejected": -1.0008597373962402, "logps/chosen": -1.2798808813095093, "logps/rejected": -1.4623150825500488, "loss": 2.5023, "nll_loss": 0.5672391057014465, "rewards/accuracies": 0.75, "rewards/chosen": -0.12798810005187988, "rewards/margins": 0.018243417143821716, "rewards/rejected": -0.1462315171957016, "step": 439 }, { "epoch": 1.1601845748187212, "grad_norm": 9.680135726928711, "learning_rate": 4.947648624667258e-06, "log_odds_chosen": 0.7590002417564392, "log_odds_ratio": -0.40648603439331055, "logits/chosen": -1.078658103942871, "logits/rejected": -0.9909886121749878, "logps/chosen": -1.2903246879577637, "logps/rejected": -1.8791279792785645, "loss": 2.0585, "nll_loss": 0.47397562861442566, "rewards/accuracies": 1.0, "rewards/chosen": -0.12903249263763428, "rewards/margins": 0.05888032540678978, "rewards/rejected": -0.18791279196739197, "step": 440 }, { "epoch": 1.162821357943309, "grad_norm": 9.91891098022461, "learning_rate": 4.940550133096717e-06, "log_odds_chosen": 0.4134746193885803, "log_odds_ratio": -0.5270575284957886, "logits/chosen": -1.071434497833252, "logits/rejected": -0.9813169240951538, "logps/chosen": -1.4695409536361694, "logps/rejected": -1.8117649555206299, "loss": 2.4518, "nll_loss": 0.5602436661720276, "rewards/accuracies": 0.875, "rewards/chosen": -0.14695410430431366, "rewards/margins": 0.03422239422798157, "rewards/rejected": -0.18117651343345642, "step": 441 }, { "epoch": 1.1654581410678972, "grad_norm": 9.274177551269531, "learning_rate": 4.933451641526176e-06, "log_odds_chosen": 0.5152239203453064, "log_odds_ratio": -0.4806838631629944, "logits/chosen": -1.0709270238876343, "logits/rejected": -1.0069520473480225, "logps/chosen": -1.366707444190979, "logps/rejected": -1.7815189361572266, "loss": 2.2684, "nll_loss": 0.5190252661705017, "rewards/accuracies": 1.0, "rewards/chosen": -0.13667075335979462, "rewards/margins": 0.0414811447262764, "rewards/rejected": -0.17815189063549042, "step": 442 }, { "epoch": 1.1680949241924852, "grad_norm": 9.643570899963379, "learning_rate": 4.926353149955634e-06, "log_odds_chosen": 0.3073192834854126, "log_odds_ratio": -0.565639078617096, "logits/chosen": -1.0718648433685303, "logits/rejected": -1.0097239017486572, "logps/chosen": -1.4407001733779907, "logps/rejected": -1.6975566148757935, "loss": 2.3838, "nll_loss": 0.5393775701522827, "rewards/accuracies": 0.625, "rewards/chosen": -0.14407002925872803, "rewards/margins": 0.02568562515079975, "rewards/rejected": -0.16975563764572144, "step": 443 }, { "epoch": 1.170731707317073, "grad_norm": 9.919586181640625, "learning_rate": 4.9192546583850935e-06, "log_odds_chosen": 0.2999556362628937, "log_odds_ratio": -0.5668871998786926, "logits/chosen": -1.0565747022628784, "logits/rejected": -0.9812292456626892, "logps/chosen": -1.3061429262161255, "logps/rejected": -1.5474501848220825, "loss": 2.1788, "nll_loss": 0.4880080819129944, "rewards/accuracies": 0.875, "rewards/chosen": -0.1306142956018448, "rewards/margins": 0.02413073554635048, "rewards/rejected": -0.15474501252174377, "step": 444 }, { "epoch": 1.1733684904416612, "grad_norm": 9.811331748962402, "learning_rate": 4.9121561668145515e-06, "log_odds_chosen": 0.3747047781944275, "log_odds_ratio": -0.5285030603408813, "logits/chosen": -1.0548806190490723, "logits/rejected": -0.9971832036972046, "logps/chosen": -1.227198600769043, "logps/rejected": -1.4902523756027222, "loss": 2.1844, "nll_loss": 0.49325278401374817, "rewards/accuracies": 0.875, "rewards/chosen": -0.12271985411643982, "rewards/margins": 0.02630537748336792, "rewards/rejected": -0.14902523159980774, "step": 445 }, { "epoch": 1.1760052735662492, "grad_norm": 9.10595703125, "learning_rate": 4.9050576752440104e-06, "log_odds_chosen": 0.619392991065979, "log_odds_ratio": -0.454559326171875, "logits/chosen": -1.030524492263794, "logits/rejected": -0.9345372915267944, "logps/chosen": -1.3098779916763306, "logps/rejected": -1.8098673820495605, "loss": 1.9186, "nll_loss": 0.4341898560523987, "rewards/accuracies": 0.875, "rewards/chosen": -0.13098779320716858, "rewards/margins": 0.049998946487903595, "rewards/rejected": -0.18098673224449158, "step": 446 }, { "epoch": 1.178642056690837, "grad_norm": 10.539815902709961, "learning_rate": 4.897959183673469e-06, "log_odds_chosen": 0.23752687871456146, "log_odds_ratio": -0.5838688611984253, "logits/chosen": -1.1413980722427368, "logits/rejected": -1.022702932357788, "logps/chosen": -1.532981038093567, "logps/rejected": -1.7253105640411377, "loss": 3.512, "nll_loss": 0.8196170330047607, "rewards/accuracies": 0.875, "rewards/chosen": -0.15329810976982117, "rewards/margins": 0.019232943654060364, "rewards/rejected": -0.17253105342388153, "step": 447 }, { "epoch": 1.1812788398154253, "grad_norm": 9.152738571166992, "learning_rate": 4.890860692102927e-06, "log_odds_chosen": 0.817240834236145, "log_odds_ratio": -0.4035162329673767, "logits/chosen": -0.9740374088287354, "logits/rejected": -0.8783177733421326, "logps/chosen": -1.3137390613555908, "logps/rejected": -1.9894474744796753, "loss": 1.8385, "nll_loss": 0.4192817807197571, "rewards/accuracies": 1.0, "rewards/chosen": -0.13137391209602356, "rewards/margins": 0.06757082790136337, "rewards/rejected": -0.19894473254680634, "step": 448 }, { "epoch": 1.1839156229400132, "grad_norm": 8.939960479736328, "learning_rate": 4.883762200532387e-06, "log_odds_chosen": 0.3687882721424103, "log_odds_ratio": -0.5365138053894043, "logits/chosen": -1.0808924436569214, "logits/rejected": -0.9705110788345337, "logps/chosen": -1.3692409992218018, "logps/rejected": -1.6519029140472412, "loss": 2.1733, "nll_loss": 0.4896632432937622, "rewards/accuracies": 0.75, "rewards/chosen": -0.1369241178035736, "rewards/margins": 0.0282661821693182, "rewards/rejected": -0.16519027948379517, "step": 449 }, { "epoch": 1.186552406064601, "grad_norm": 10.072639465332031, "learning_rate": 4.876663708961845e-06, "log_odds_chosen": 0.6125161051750183, "log_odds_ratio": -0.4399271011352539, "logits/chosen": -1.109391689300537, "logits/rejected": -1.0129214525222778, "logps/chosen": -1.2192109823226929, "logps/rejected": -1.678152322769165, "loss": 2.1898, "nll_loss": 0.5034641027450562, "rewards/accuracies": 1.0, "rewards/chosen": -0.12192109227180481, "rewards/margins": 0.045894138514995575, "rewards/rejected": -0.16781523823738098, "step": 450 }, { "epoch": 1.1891891891891893, "grad_norm": 10.272133827209473, "learning_rate": 4.869565217391304e-06, "log_odds_chosen": 0.35755234956741333, "log_odds_ratio": -0.534394383430481, "logits/chosen": -1.0693304538726807, "logits/rejected": -0.9832642674446106, "logps/chosen": -1.6391563415527344, "logps/rejected": -1.9330048561096191, "loss": 2.827, "nll_loss": 0.6533028483390808, "rewards/accuracies": 1.0, "rewards/chosen": -0.16391563415527344, "rewards/margins": 0.029384873807430267, "rewards/rejected": -0.1933005005121231, "step": 451 }, { "epoch": 1.1918259723137772, "grad_norm": 10.0197172164917, "learning_rate": 4.862466725820763e-06, "log_odds_chosen": 0.36595743894577026, "log_odds_ratio": -0.5484901070594788, "logits/chosen": -1.0909594297409058, "logits/rejected": -1.0414979457855225, "logps/chosen": -1.3381670713424683, "logps/rejected": -1.585001826286316, "loss": 2.6636, "nll_loss": 0.6110493540763855, "rewards/accuracies": 0.875, "rewards/chosen": -0.1338167041540146, "rewards/margins": 0.024683479219675064, "rewards/rejected": -0.15850019454956055, "step": 452 }, { "epoch": 1.194462755438365, "grad_norm": 9.387800216674805, "learning_rate": 4.855368234250222e-06, "log_odds_chosen": 0.3087214231491089, "log_odds_ratio": -0.5611314177513123, "logits/chosen": -1.0786112546920776, "logits/rejected": -0.9813193082809448, "logps/chosen": -1.4780585765838623, "logps/rejected": -1.7218488454818726, "loss": 2.4138, "nll_loss": 0.5473390817642212, "rewards/accuracies": 1.0, "rewards/chosen": -0.14780586957931519, "rewards/margins": 0.02437901869416237, "rewards/rejected": -0.17218489944934845, "step": 453 }, { "epoch": 1.1970995385629533, "grad_norm": 10.184649467468262, "learning_rate": 4.84826974267968e-06, "log_odds_chosen": 0.50560462474823, "log_odds_ratio": -0.4792592525482178, "logits/chosen": -1.097749948501587, "logits/rejected": -0.9480170011520386, "logps/chosen": -1.3219808340072632, "logps/rejected": -1.7089626789093018, "loss": 2.4091, "nll_loss": 0.5543407797813416, "rewards/accuracies": 1.0, "rewards/chosen": -0.13219808042049408, "rewards/margins": 0.03869818150997162, "rewards/rejected": -0.1708962619304657, "step": 454 }, { "epoch": 1.1997363216875412, "grad_norm": 9.074294090270996, "learning_rate": 4.841171251109139e-06, "log_odds_chosen": 0.7959095239639282, "log_odds_ratio": -0.40639543533325195, "logits/chosen": -1.0378267765045166, "logits/rejected": -0.9526659250259399, "logps/chosen": -1.2382162809371948, "logps/rejected": -1.829305648803711, "loss": 2.0053, "nll_loss": 0.46069228649139404, "rewards/accuracies": 1.0, "rewards/chosen": -0.12382163852453232, "rewards/margins": 0.059108927845954895, "rewards/rejected": -0.18293055891990662, "step": 455 }, { "epoch": 1.2023731048121291, "grad_norm": 9.499855041503906, "learning_rate": 4.834072759538598e-06, "log_odds_chosen": 0.29473090171813965, "log_odds_ratio": -0.573003888130188, "logits/chosen": -1.0707342624664307, "logits/rejected": -0.9860851168632507, "logps/chosen": -1.314915418624878, "logps/rejected": -1.5479837656021118, "loss": 2.3088, "nll_loss": 0.5199118256568909, "rewards/accuracies": 0.875, "rewards/chosen": -0.1314915418624878, "rewards/margins": 0.023306837305426598, "rewards/rejected": -0.15479837357997894, "step": 456 }, { "epoch": 1.2050098879367173, "grad_norm": 9.589157104492188, "learning_rate": 4.826974267968057e-06, "log_odds_chosen": 0.2921063303947449, "log_odds_ratio": -0.5728156566619873, "logits/chosen": -1.0033105611801147, "logits/rejected": -0.9454600811004639, "logps/chosen": -1.4705820083618164, "logps/rejected": -1.7040565013885498, "loss": 2.1781, "nll_loss": 0.48725056648254395, "rewards/accuracies": 0.875, "rewards/chosen": -0.14705820381641388, "rewards/margins": 0.023347454145550728, "rewards/rejected": -0.17040565609931946, "step": 457 }, { "epoch": 1.2076466710613052, "grad_norm": 9.195829391479492, "learning_rate": 4.8198757763975155e-06, "log_odds_chosen": 0.5113301277160645, "log_odds_ratio": -0.4782109558582306, "logits/chosen": -1.0404274463653564, "logits/rejected": -0.9821817874908447, "logps/chosen": -1.3258305788040161, "logps/rejected": -1.7209516763687134, "loss": 2.0337, "nll_loss": 0.4606133997440338, "rewards/accuracies": 1.0, "rewards/chosen": -0.13258305191993713, "rewards/margins": 0.03951210528612137, "rewards/rejected": -0.1720951795578003, "step": 458 }, { "epoch": 1.2102834541858931, "grad_norm": 9.847295761108398, "learning_rate": 4.812777284826974e-06, "log_odds_chosen": 0.45984476804733276, "log_odds_ratio": -0.49745601415634155, "logits/chosen": -1.033888578414917, "logits/rejected": -0.9601117372512817, "logps/chosen": -1.411940574645996, "logps/rejected": -1.7642412185668945, "loss": 2.2796, "nll_loss": 0.5201572775840759, "rewards/accuracies": 1.0, "rewards/chosen": -0.14119404554367065, "rewards/margins": 0.03523007407784462, "rewards/rejected": -0.17642413079738617, "step": 459 }, { "epoch": 1.2129202373104813, "grad_norm": 9.481552124023438, "learning_rate": 4.805678793256433e-06, "log_odds_chosen": 0.2638810873031616, "log_odds_ratio": -0.5864376425743103, "logits/chosen": -1.0747355222702026, "logits/rejected": -1.0247514247894287, "logps/chosen": -1.3305190801620483, "logps/rejected": -1.5326287746429443, "loss": 2.4889, "nll_loss": 0.5635707974433899, "rewards/accuracies": 0.875, "rewards/chosen": -0.13305191695690155, "rewards/margins": 0.02021096646785736, "rewards/rejected": -0.1532628834247589, "step": 460 }, { "epoch": 1.2155570204350692, "grad_norm": 10.324326515197754, "learning_rate": 4.798580301685891e-06, "log_odds_chosen": 0.4667387008666992, "log_odds_ratio": -0.49474701285362244, "logits/chosen": -1.1680094003677368, "logits/rejected": -1.090071439743042, "logps/chosen": -1.333450198173523, "logps/rejected": -1.7026855945587158, "loss": 2.6032, "nll_loss": 0.6013253331184387, "rewards/accuracies": 1.0, "rewards/chosen": -0.13334502279758453, "rewards/margins": 0.03692355006933212, "rewards/rejected": -0.17026856541633606, "step": 461 }, { "epoch": 1.2181938035596573, "grad_norm": 9.674001693725586, "learning_rate": 4.79148181011535e-06, "log_odds_chosen": 0.6859418749809265, "log_odds_ratio": -0.43002980947494507, "logits/chosen": -1.090169906616211, "logits/rejected": -0.9816117286682129, "logps/chosen": -1.3842506408691406, "logps/rejected": -1.94362211227417, "loss": 2.3453, "nll_loss": 0.5433187484741211, "rewards/accuracies": 1.0, "rewards/chosen": -0.1384250670671463, "rewards/margins": 0.05593716353178024, "rewards/rejected": -0.19436222314834595, "step": 462 }, { "epoch": 1.2208305866842453, "grad_norm": 10.021893501281738, "learning_rate": 4.784383318544809e-06, "log_odds_chosen": 0.34028083086013794, "log_odds_ratio": -0.5499065518379211, "logits/chosen": -1.116060733795166, "logits/rejected": -1.0241994857788086, "logps/chosen": -1.4120571613311768, "logps/rejected": -1.6751539707183838, "loss": 2.6105, "nll_loss": 0.5976427793502808, "rewards/accuracies": 0.875, "rewards/chosen": -0.14120571315288544, "rewards/margins": 0.02630968950688839, "rewards/rejected": -0.16751541197299957, "step": 463 }, { "epoch": 1.2234673698088332, "grad_norm": 10.2836332321167, "learning_rate": 4.777284826974267e-06, "log_odds_chosen": 0.5319832563400269, "log_odds_ratio": -0.4687221348285675, "logits/chosen": -1.099975824356079, "logits/rejected": -0.9656794667243958, "logps/chosen": -1.3129050731658936, "logps/rejected": -1.7177257537841797, "loss": 2.3185, "nll_loss": 0.5327426195144653, "rewards/accuracies": 1.0, "rewards/chosen": -0.1312904953956604, "rewards/margins": 0.040482066571712494, "rewards/rejected": -0.1717725694179535, "step": 464 }, { "epoch": 1.2261041529334213, "grad_norm": 8.89089584350586, "learning_rate": 4.770186335403727e-06, "log_odds_chosen": 0.5987759828567505, "log_odds_ratio": -0.4692104756832123, "logits/chosen": -1.0071309804916382, "logits/rejected": -0.9458831548690796, "logps/chosen": -1.357689619064331, "logps/rejected": -1.819558024406433, "loss": 1.733, "nll_loss": 0.38633984327316284, "rewards/accuracies": 0.875, "rewards/chosen": -0.13576896488666534, "rewards/margins": 0.04618682339787483, "rewards/rejected": -0.18195581436157227, "step": 465 }, { "epoch": 1.2287409360580093, "grad_norm": 9.948925018310547, "learning_rate": 4.763087843833185e-06, "log_odds_chosen": 0.15255561470985413, "log_odds_ratio": -0.6316505074501038, "logits/chosen": -1.0351141691207886, "logits/rejected": -1.004443645477295, "logps/chosen": -1.345405101776123, "logps/rejected": -1.4590325355529785, "loss": 2.131, "nll_loss": 0.46959295868873596, "rewards/accuracies": 0.75, "rewards/chosen": -0.13454051315784454, "rewards/margins": 0.01136273704469204, "rewards/rejected": -0.14590325951576233, "step": 466 }, { "epoch": 1.2313777191825972, "grad_norm": 10.681402206420898, "learning_rate": 4.755989352262644e-06, "log_odds_chosen": 0.3940380811691284, "log_odds_ratio": -0.5332112312316895, "logits/chosen": -1.1033565998077393, "logits/rejected": -1.0111594200134277, "logps/chosen": -1.4389121532440186, "logps/rejected": -1.7569591999053955, "loss": 2.3208, "nll_loss": 0.5268727540969849, "rewards/accuracies": 0.875, "rewards/chosen": -0.14389123022556305, "rewards/margins": 0.031804703176021576, "rewards/rejected": -0.17569592595100403, "step": 467 }, { "epoch": 1.2340145023071853, "grad_norm": 9.829061508178711, "learning_rate": 4.748890860692103e-06, "log_odds_chosen": 0.5439039468765259, "log_odds_ratio": -0.46788734197616577, "logits/chosen": -1.027842402458191, "logits/rejected": -0.9599955677986145, "logps/chosen": -1.3897879123687744, "logps/rejected": -1.8280632495880127, "loss": 2.157, "nll_loss": 0.4924602508544922, "rewards/accuracies": 0.875, "rewards/chosen": -0.13897879421710968, "rewards/margins": 0.043827541172504425, "rewards/rejected": -0.1828063428401947, "step": 468 }, { "epoch": 1.2366512854317733, "grad_norm": 10.2400541305542, "learning_rate": 4.741792369121562e-06, "log_odds_chosen": 0.26251474022865295, "log_odds_ratio": -0.5879825353622437, "logits/chosen": -1.071041464805603, "logits/rejected": -0.9921205043792725, "logps/chosen": -1.557490587234497, "logps/rejected": -1.7682377099990845, "loss": 2.774, "nll_loss": 0.6346943378448486, "rewards/accuracies": 0.625, "rewards/chosen": -0.15574905276298523, "rewards/margins": 0.021074706688523293, "rewards/rejected": -0.17682376503944397, "step": 469 }, { "epoch": 1.2392880685563612, "grad_norm": 9.739836692810059, "learning_rate": 4.73469387755102e-06, "log_odds_chosen": 0.49304506182670593, "log_odds_ratio": -0.4902867376804352, "logits/chosen": -1.0478068590164185, "logits/rejected": -0.988756537437439, "logps/chosen": -1.3048160076141357, "logps/rejected": -1.6764576435089111, "loss": 2.2099, "nll_loss": 0.5034420490264893, "rewards/accuracies": 1.0, "rewards/chosen": -0.13048160076141357, "rewards/margins": 0.03716415539383888, "rewards/rejected": -0.16764578223228455, "step": 470 }, { "epoch": 1.2419248516809493, "grad_norm": 10.401447296142578, "learning_rate": 4.7275953859804795e-06, "log_odds_chosen": 0.2648758292198181, "log_odds_ratio": -0.5851804614067078, "logits/chosen": -1.1387560367584229, "logits/rejected": -1.0944470167160034, "logps/chosen": -1.4404828548431396, "logps/rejected": -1.6441142559051514, "loss": 2.4325, "nll_loss": 0.5495949983596802, "rewards/accuracies": 0.875, "rewards/chosen": -0.144048273563385, "rewards/margins": 0.02036314457654953, "rewards/rejected": -0.16441142559051514, "step": 471 }, { "epoch": 1.2445616348055373, "grad_norm": 10.700909614562988, "learning_rate": 4.720496894409938e-06, "log_odds_chosen": 0.24711650609970093, "log_odds_ratio": -0.6107478737831116, "logits/chosen": -1.0727221965789795, "logits/rejected": -0.9578004479408264, "logps/chosen": -1.564539909362793, "logps/rejected": -1.7778160572052002, "loss": 2.5579, "nll_loss": 0.5783965587615967, "rewards/accuracies": 0.875, "rewards/chosen": -0.15645399689674377, "rewards/margins": 0.021327603608369827, "rewards/rejected": -0.1777816116809845, "step": 472 }, { "epoch": 1.2471984179301252, "grad_norm": 10.717304229736328, "learning_rate": 4.713398402839396e-06, "log_odds_chosen": 0.10234677046537399, "log_odds_ratio": -0.6500293016433716, "logits/chosen": -1.0790221691131592, "logits/rejected": -0.99981689453125, "logps/chosen": -1.481553077697754, "logps/rejected": -1.548191785812378, "loss": 2.5536, "nll_loss": 0.5734033584594727, "rewards/accuracies": 0.625, "rewards/chosen": -0.1481553167104721, "rewards/margins": 0.006663870066404343, "rewards/rejected": -0.15481917560100555, "step": 473 }, { "epoch": 1.2498352010547134, "grad_norm": 10.185778617858887, "learning_rate": 4.706299911268855e-06, "log_odds_chosen": 0.4353746175765991, "log_odds_ratio": -0.5056857466697693, "logits/chosen": -1.0191655158996582, "logits/rejected": -0.9557269811630249, "logps/chosen": -1.4184210300445557, "logps/rejected": -1.7543326616287231, "loss": 2.1173, "nll_loss": 0.4787678122520447, "rewards/accuracies": 1.0, "rewards/chosen": -0.14184211194515228, "rewards/margins": 0.03359116241335869, "rewards/rejected": -0.17543327808380127, "step": 474 }, { "epoch": 1.2524719841793013, "grad_norm": 8.66719913482666, "learning_rate": 4.6992014196983134e-06, "log_odds_chosen": 0.351694256067276, "log_odds_ratio": -0.5478801727294922, "logits/chosen": -1.0281472206115723, "logits/rejected": -0.9631309509277344, "logps/chosen": -1.3119217157363892, "logps/rejected": -1.575613260269165, "loss": 1.9327, "nll_loss": 0.4283771514892578, "rewards/accuracies": 0.875, "rewards/chosen": -0.1311921775341034, "rewards/margins": 0.026369158178567886, "rewards/rejected": -0.1575613170862198, "step": 475 }, { "epoch": 1.2551087673038892, "grad_norm": 9.768600463867188, "learning_rate": 4.692102928127773e-06, "log_odds_chosen": 0.6341227889060974, "log_odds_ratio": -0.4367087483406067, "logits/chosen": -1.022531509399414, "logits/rejected": -0.9440472722053528, "logps/chosen": -1.2810771465301514, "logps/rejected": -1.7700272798538208, "loss": 1.8679, "nll_loss": 0.42331641912460327, "rewards/accuracies": 1.0, "rewards/chosen": -0.1281077116727829, "rewards/margins": 0.04889502748847008, "rewards/rejected": -0.17700272798538208, "step": 476 }, { "epoch": 1.2577455504284774, "grad_norm": 10.173186302185059, "learning_rate": 4.685004436557231e-06, "log_odds_chosen": 0.44422340393066406, "log_odds_ratio": -0.5055138468742371, "logits/chosen": -1.0871319770812988, "logits/rejected": -0.9896256327629089, "logps/chosen": -1.4739582538604736, "logps/rejected": -1.84141206741333, "loss": 2.4561, "nll_loss": 0.5634859800338745, "rewards/accuracies": 1.0, "rewards/chosen": -0.14739583432674408, "rewards/margins": 0.036745380610227585, "rewards/rejected": -0.18414120376110077, "step": 477 }, { "epoch": 1.2603823335530653, "grad_norm": 9.409655570983887, "learning_rate": 4.67790594498669e-06, "log_odds_chosen": 0.8021876811981201, "log_odds_ratio": -0.430938184261322, "logits/chosen": -1.0572388172149658, "logits/rejected": -0.9765225052833557, "logps/chosen": -1.298008918762207, "logps/rejected": -1.934680700302124, "loss": 2.0465, "nll_loss": 0.46853333711624146, "rewards/accuracies": 0.875, "rewards/chosen": -0.1298009157180786, "rewards/margins": 0.0636671632528305, "rewards/rejected": -0.19346806406974792, "step": 478 }, { "epoch": 1.2630191166776532, "grad_norm": 9.278806686401367, "learning_rate": 4.670807453416149e-06, "log_odds_chosen": 0.5163518786430359, "log_odds_ratio": -0.48643603920936584, "logits/chosen": -1.025752067565918, "logits/rejected": -0.9585089087486267, "logps/chosen": -1.2525575160980225, "logps/rejected": -1.6311428546905518, "loss": 1.8546, "nll_loss": 0.41501230001449585, "rewards/accuracies": 0.875, "rewards/chosen": -0.12525574862957, "rewards/margins": 0.037858542054891586, "rewards/rejected": -0.1631142795085907, "step": 479 }, { "epoch": 1.2656558998022414, "grad_norm": 10.19020938873291, "learning_rate": 4.663708961845608e-06, "log_odds_chosen": 0.4915609061717987, "log_odds_ratio": -0.49741148948669434, "logits/chosen": -1.096588373184204, "logits/rejected": -1.0395936965942383, "logps/chosen": -1.332758903503418, "logps/rejected": -1.6852349042892456, "loss": 2.4374, "nll_loss": 0.5596209168434143, "rewards/accuracies": 0.875, "rewards/chosen": -0.13327589631080627, "rewards/margins": 0.035247594118118286, "rewards/rejected": -0.16852349042892456, "step": 480 }, { "epoch": 1.2682926829268293, "grad_norm": 10.530284881591797, "learning_rate": 4.656610470275066e-06, "log_odds_chosen": 0.3427813649177551, "log_odds_ratio": -0.543682873249054, "logits/chosen": -1.1346460580825806, "logits/rejected": -1.0081522464752197, "logps/chosen": -1.4239239692687988, "logps/rejected": -1.6867188215255737, "loss": 2.7272, "nll_loss": 0.6274286508560181, "rewards/accuracies": 0.875, "rewards/chosen": -0.14239239692687988, "rewards/margins": 0.026279501616954803, "rewards/rejected": -0.1686718761920929, "step": 481 }, { "epoch": 1.2709294660514172, "grad_norm": 9.774396896362305, "learning_rate": 4.649511978704525e-06, "log_odds_chosen": 0.5521863102912903, "log_odds_ratio": -0.4603966176509857, "logits/chosen": -1.0786266326904297, "logits/rejected": -0.986792802810669, "logps/chosen": -1.4126559495925903, "logps/rejected": -1.8508191108703613, "loss": 2.3706, "nll_loss": 0.5466091632843018, "rewards/accuracies": 1.0, "rewards/chosen": -0.1412656009197235, "rewards/margins": 0.043816305696964264, "rewards/rejected": -0.18508189916610718, "step": 482 }, { "epoch": 1.2735662491760054, "grad_norm": 9.518534660339355, "learning_rate": 4.642413487133984e-06, "log_odds_chosen": 0.19123250246047974, "log_odds_ratio": -0.6104004383087158, "logits/chosen": -1.1052263975143433, "logits/rejected": -1.0724090337753296, "logps/chosen": -1.2705307006835938, "logps/rejected": -1.4086744785308838, "loss": 2.1503, "nll_loss": 0.4765278100967407, "rewards/accuracies": 0.875, "rewards/chosen": -0.12705306708812714, "rewards/margins": 0.013814376667141914, "rewards/rejected": -0.1408674567937851, "step": 483 }, { "epoch": 1.2762030323005933, "grad_norm": 10.300300598144531, "learning_rate": 4.635314995563443e-06, "log_odds_chosen": 1.0402419567108154, "log_odds_ratio": -0.3782433569431305, "logits/chosen": -1.122322916984558, "logits/rejected": -0.9631179571151733, "logps/chosen": -1.3168840408325195, "logps/rejected": -2.1896350383758545, "loss": 2.2423, "nll_loss": 0.5227570533752441, "rewards/accuracies": 1.0, "rewards/chosen": -0.1316884160041809, "rewards/margins": 0.08727509528398514, "rewards/rejected": -0.21896350383758545, "step": 484 }, { "epoch": 1.2788398154251812, "grad_norm": 9.907837867736816, "learning_rate": 4.628216503992902e-06, "log_odds_chosen": 0.49135133624076843, "log_odds_ratio": -0.5001718401908875, "logits/chosen": -1.0758206844329834, "logits/rejected": -1.0195170640945435, "logps/chosen": -1.3113082647323608, "logps/rejected": -1.6820894479751587, "loss": 2.7119, "nll_loss": 0.6279576420783997, "rewards/accuracies": 0.875, "rewards/chosen": -0.13113084435462952, "rewards/margins": 0.03707811236381531, "rewards/rejected": -0.16820895671844482, "step": 485 }, { "epoch": 1.2814765985497694, "grad_norm": 10.635579109191895, "learning_rate": 4.62111801242236e-06, "log_odds_chosen": 0.26804792881011963, "log_odds_ratio": -0.579971432685852, "logits/chosen": -1.1183302402496338, "logits/rejected": -1.033354640007019, "logps/chosen": -1.4729652404785156, "logps/rejected": -1.677699327468872, "loss": 2.7361, "nll_loss": 0.6260372996330261, "rewards/accuracies": 0.625, "rewards/chosen": -0.14729654788970947, "rewards/margins": 0.020473407581448555, "rewards/rejected": -0.16776993870735168, "step": 486 }, { "epoch": 1.2841133816743573, "grad_norm": 10.664183616638184, "learning_rate": 4.614019520851819e-06, "log_odds_chosen": 0.4930553436279297, "log_odds_ratio": -0.4874056875705719, "logits/chosen": -1.1609282493591309, "logits/rejected": -0.9723719358444214, "logps/chosen": -1.4261642694473267, "logps/rejected": -1.8289806842803955, "loss": 2.7762, "nll_loss": 0.6453151702880859, "rewards/accuracies": 1.0, "rewards/chosen": -0.14261643588542938, "rewards/margins": 0.040281638503074646, "rewards/rejected": -0.18289807438850403, "step": 487 }, { "epoch": 1.2867501647989452, "grad_norm": 9.715775489807129, "learning_rate": 4.6069210292812774e-06, "log_odds_chosen": 0.5041050910949707, "log_odds_ratio": -0.4901432991027832, "logits/chosen": -1.1087746620178223, "logits/rejected": -1.0324528217315674, "logps/chosen": -1.2873916625976562, "logps/rejected": -1.6561278104782104, "loss": 2.1677, "nll_loss": 0.4929226040840149, "rewards/accuracies": 0.875, "rewards/chosen": -0.12873917818069458, "rewards/margins": 0.03687359765172005, "rewards/rejected": -0.16561277210712433, "step": 488 }, { "epoch": 1.2893869479235334, "grad_norm": 9.831827163696289, "learning_rate": 4.599822537710736e-06, "log_odds_chosen": 0.4582131505012512, "log_odds_ratio": -0.49508172273635864, "logits/chosen": -1.1099534034729004, "logits/rejected": -1.0009325742721558, "logps/chosen": -1.3387107849121094, "logps/rejected": -1.7003153562545776, "loss": 2.3767, "nll_loss": 0.544657826423645, "rewards/accuracies": 1.0, "rewards/chosen": -0.13387107849121094, "rewards/margins": 0.03616045415401459, "rewards/rejected": -0.17003153264522552, "step": 489 }, { "epoch": 1.2920237310481213, "grad_norm": 10.236985206604004, "learning_rate": 4.592724046140195e-06, "log_odds_chosen": 0.8142597675323486, "log_odds_ratio": -0.46944791078567505, "logits/chosen": -1.032392978668213, "logits/rejected": -0.9740979075431824, "logps/chosen": -1.5206587314605713, "logps/rejected": -2.215409755706787, "loss": 2.3121, "nll_loss": 0.531091034412384, "rewards/accuracies": 0.875, "rewards/chosen": -0.15206588804721832, "rewards/margins": 0.06947509944438934, "rewards/rejected": -0.22154098749160767, "step": 490 }, { "epoch": 1.2946605141727092, "grad_norm": 10.013334274291992, "learning_rate": 4.585625554569653e-06, "log_odds_chosen": 0.37788182497024536, "log_odds_ratio": -0.5361526012420654, "logits/chosen": -1.0860071182250977, "logits/rejected": -1.032589077949524, "logps/chosen": -1.5222413539886475, "logps/rejected": -1.8303142786026, "loss": 2.8472, "nll_loss": 0.6581736207008362, "rewards/accuracies": 0.75, "rewards/chosen": -0.1522241234779358, "rewards/margins": 0.030807292088866234, "rewards/rejected": -0.18303140997886658, "step": 491 }, { "epoch": 1.2972972972972974, "grad_norm": 9.102086067199707, "learning_rate": 4.578527062999112e-06, "log_odds_chosen": 0.35535067319869995, "log_odds_ratio": -0.5479142665863037, "logits/chosen": -1.008329153060913, "logits/rejected": -0.9556934833526611, "logps/chosen": -1.277017593383789, "logps/rejected": -1.5601518154144287, "loss": 1.8573, "nll_loss": 0.4095434248447418, "rewards/accuracies": 0.875, "rewards/chosen": -0.1277017742395401, "rewards/margins": 0.028313428163528442, "rewards/rejected": -0.15601518750190735, "step": 492 }, { "epoch": 1.2999340804218853, "grad_norm": 9.600390434265137, "learning_rate": 4.571428571428571e-06, "log_odds_chosen": 0.5586351752281189, "log_odds_ratio": -0.4633020758628845, "logits/chosen": -1.1387877464294434, "logits/rejected": -1.0374525785446167, "logps/chosen": -1.492626428604126, "logps/rejected": -1.9490209817886353, "loss": 2.5633, "nll_loss": 0.5945045351982117, "rewards/accuracies": 1.0, "rewards/chosen": -0.14926263689994812, "rewards/margins": 0.04563945531845093, "rewards/rejected": -0.19490210711956024, "step": 493 }, { "epoch": 1.3025708635464732, "grad_norm": 9.457123756408691, "learning_rate": 4.56433007985803e-06, "log_odds_chosen": 0.7423944473266602, "log_odds_ratio": -0.39905303716659546, "logits/chosen": -1.1221680641174316, "logits/rejected": -0.9649347066879272, "logps/chosen": -1.3206372261047363, "logps/rejected": -1.889852523803711, "loss": 2.355, "nll_loss": 0.5488362312316895, "rewards/accuracies": 1.0, "rewards/chosen": -0.13206373155117035, "rewards/margins": 0.05692153424024582, "rewards/rejected": -0.18898525834083557, "step": 494 }, { "epoch": 1.3052076466710614, "grad_norm": 10.164525985717773, "learning_rate": 4.557231588287489e-06, "log_odds_chosen": 0.4164513349533081, "log_odds_ratio": -0.5168020725250244, "logits/chosen": -1.0461890697479248, "logits/rejected": -0.9742856025695801, "logps/chosen": -1.4978477954864502, "logps/rejected": -1.8457560539245605, "loss": 2.4777, "nll_loss": 0.5677485466003418, "rewards/accuracies": 0.875, "rewards/chosen": -0.14978477358818054, "rewards/margins": 0.03479084372520447, "rewards/rejected": -0.184575617313385, "step": 495 }, { "epoch": 1.3078444297956493, "grad_norm": 9.663432121276855, "learning_rate": 4.550133096716948e-06, "log_odds_chosen": 0.5467130541801453, "log_odds_ratio": -0.4778140187263489, "logits/chosen": -1.1307505369186401, "logits/rejected": -1.0496280193328857, "logps/chosen": -1.345214605331421, "logps/rejected": -1.7747489213943481, "loss": 2.3803, "nll_loss": 0.5472964644432068, "rewards/accuracies": 0.75, "rewards/chosen": -0.1345214694738388, "rewards/margins": 0.04295342043042183, "rewards/rejected": -0.17747488617897034, "step": 496 }, { "epoch": 1.3104812129202372, "grad_norm": 9.685505867004395, "learning_rate": 4.543034605146406e-06, "log_odds_chosen": 0.4372747540473938, "log_odds_ratio": -0.5114445686340332, "logits/chosen": -1.1019073724746704, "logits/rejected": -1.0229694843292236, "logps/chosen": -1.3307362794876099, "logps/rejected": -1.665488600730896, "loss": 2.4608, "nll_loss": 0.564062774181366, "rewards/accuracies": 0.875, "rewards/chosen": -0.133073627948761, "rewards/margins": 0.033475231379270554, "rewards/rejected": -0.16654884815216064, "step": 497 }, { "epoch": 1.3131179960448254, "grad_norm": 9.91350269317627, "learning_rate": 4.535936113575866e-06, "log_odds_chosen": 0.4812414348125458, "log_odds_ratio": -0.4848937392234802, "logits/chosen": -1.0916714668273926, "logits/rejected": -1.0008459091186523, "logps/chosen": -1.315626859664917, "logps/rejected": -1.6926066875457764, "loss": 2.1481, "nll_loss": 0.48853224515914917, "rewards/accuracies": 1.0, "rewards/chosen": -0.13156269490718842, "rewards/margins": 0.037697985768318176, "rewards/rejected": -0.1692606806755066, "step": 498 }, { "epoch": 1.3157547791694133, "grad_norm": 10.163434982299805, "learning_rate": 4.528837622005324e-06, "log_odds_chosen": 0.37900176644325256, "log_odds_ratio": -0.5468506217002869, "logits/chosen": -1.0818631649017334, "logits/rejected": -0.9458174705505371, "logps/chosen": -1.6487318277359009, "logps/rejected": -1.9673023223876953, "loss": 2.5613, "nll_loss": 0.5856290459632874, "rewards/accuracies": 0.75, "rewards/chosen": -0.1648731827735901, "rewards/margins": 0.03185705095529556, "rewards/rejected": -0.19673022627830505, "step": 499 }, { "epoch": 1.3183915622940012, "grad_norm": 9.899470329284668, "learning_rate": 4.521739130434782e-06, "log_odds_chosen": 0.5170084238052368, "log_odds_ratio": -0.4788874387741089, "logits/chosen": -1.0836416482925415, "logits/rejected": -0.9832676649093628, "logps/chosen": -1.4521634578704834, "logps/rejected": -1.8645068407058716, "loss": 2.3863, "nll_loss": 0.5486769676208496, "rewards/accuracies": 0.875, "rewards/chosen": -0.14521636068820953, "rewards/margins": 0.04123431444168091, "rewards/rejected": -0.18645067512989044, "step": 500 }, { "epoch": 1.3210283454185894, "grad_norm": 9.3609037399292, "learning_rate": 4.5146406388642415e-06, "log_odds_chosen": 0.529761791229248, "log_odds_ratio": -0.4753470718860626, "logits/chosen": -1.024951696395874, "logits/rejected": -0.975610613822937, "logps/chosen": -1.1999605894088745, "logps/rejected": -1.6050573587417603, "loss": 1.7346, "nll_loss": 0.3861042857170105, "rewards/accuracies": 1.0, "rewards/chosen": -0.11999605596065521, "rewards/margins": 0.040509678423404694, "rewards/rejected": -0.1605057418346405, "step": 501 }, { "epoch": 1.3236651285431773, "grad_norm": 10.891241073608398, "learning_rate": 4.5075421472936995e-06, "log_odds_chosen": 0.1583474576473236, "log_odds_ratio": -0.6193140149116516, "logits/chosen": -1.0535261631011963, "logits/rejected": -1.0259627103805542, "logps/chosen": -1.618718147277832, "logps/rejected": -1.746124029159546, "loss": 2.7019, "nll_loss": 0.613548755645752, "rewards/accuracies": 0.875, "rewards/chosen": -0.16187182068824768, "rewards/margins": 0.012740583159029484, "rewards/rejected": -0.1746124029159546, "step": 502 }, { "epoch": 1.3263019116677652, "grad_norm": 10.03843879699707, "learning_rate": 4.500443655723159e-06, "log_odds_chosen": 0.458172470331192, "log_odds_ratio": -0.5375922918319702, "logits/chosen": -1.1176626682281494, "logits/rejected": -1.050915241241455, "logps/chosen": -1.3184903860092163, "logps/rejected": -1.667731523513794, "loss": 2.4683, "nll_loss": 0.5633128881454468, "rewards/accuracies": 0.875, "rewards/chosen": -0.13184905052185059, "rewards/margins": 0.034924112260341644, "rewards/rejected": -0.16677314043045044, "step": 503 }, { "epoch": 1.3289386947923534, "grad_norm": 9.57889461517334, "learning_rate": 4.493345164152617e-06, "log_odds_chosen": 0.4582204818725586, "log_odds_ratio": -0.5044894218444824, "logits/chosen": -1.0578508377075195, "logits/rejected": -0.9902914762496948, "logps/chosen": -1.4905551671981812, "logps/rejected": -1.8522902727127075, "loss": 2.4895, "nll_loss": 0.5719265937805176, "rewards/accuracies": 0.875, "rewards/chosen": -0.14905551075935364, "rewards/margins": 0.03617352247238159, "rewards/rejected": -0.18522903323173523, "step": 504 }, { "epoch": 1.3315754779169413, "grad_norm": 9.206018447875977, "learning_rate": 4.486246672582076e-06, "log_odds_chosen": 0.5450261235237122, "log_odds_ratio": -0.47390875220298767, "logits/chosen": -1.1009422540664673, "logits/rejected": -1.0099388360977173, "logps/chosen": -1.2043448686599731, "logps/rejected": -1.6163784265518188, "loss": 2.1674, "nll_loss": 0.49445146322250366, "rewards/accuracies": 1.0, "rewards/chosen": -0.1204344779253006, "rewards/margins": 0.04120335727930069, "rewards/rejected": -0.16163784265518188, "step": 505 }, { "epoch": 1.3342122610415292, "grad_norm": 9.322002410888672, "learning_rate": 4.479148181011535e-06, "log_odds_chosen": 0.8400002121925354, "log_odds_ratio": -0.40598559379577637, "logits/chosen": -1.0018384456634521, "logits/rejected": -0.8976902961730957, "logps/chosen": -1.2162275314331055, "logps/rejected": -1.9030863046646118, "loss": 1.7841, "nll_loss": 0.405415415763855, "rewards/accuracies": 1.0, "rewards/chosen": -0.12162275612354279, "rewards/margins": 0.0686858743429184, "rewards/rejected": -0.1903086155653, "step": 506 }, { "epoch": 1.3368490441661174, "grad_norm": 10.626153945922852, "learning_rate": 4.472049689440993e-06, "log_odds_chosen": 0.2626584768295288, "log_odds_ratio": -0.5733634233474731, "logits/chosen": -1.1341722011566162, "logits/rejected": -1.0358843803405762, "logps/chosen": -1.5009350776672363, "logps/rejected": -1.711402416229248, "loss": 2.6765, "nll_loss": 0.6117795705795288, "rewards/accuracies": 1.0, "rewards/chosen": -0.15009349584579468, "rewards/margins": 0.021046744659543037, "rewards/rejected": -0.17114025354385376, "step": 507 }, { "epoch": 1.3394858272907053, "grad_norm": 9.762642860412598, "learning_rate": 4.464951197870452e-06, "log_odds_chosen": 0.34729743003845215, "log_odds_ratio": -0.5531666278839111, "logits/chosen": -1.0909682512283325, "logits/rejected": -0.999322772026062, "logps/chosen": -1.3530230522155762, "logps/rejected": -1.6313226222991943, "loss": 2.3527, "nll_loss": 0.5328519344329834, "rewards/accuracies": 0.625, "rewards/chosen": -0.13530230522155762, "rewards/margins": 0.027829963713884354, "rewards/rejected": -0.16313226521015167, "step": 508 }, { "epoch": 1.3421226104152932, "grad_norm": 9.921836853027344, "learning_rate": 4.457852706299911e-06, "log_odds_chosen": 0.3846244215965271, "log_odds_ratio": -0.5229079127311707, "logits/chosen": -1.1582725048065186, "logits/rejected": -1.0689759254455566, "logps/chosen": -1.4322659969329834, "logps/rejected": -1.7391772270202637, "loss": 2.7252, "nll_loss": 0.6290191411972046, "rewards/accuracies": 1.0, "rewards/chosen": -0.14322659373283386, "rewards/margins": 0.0306911189109087, "rewards/rejected": -0.1739177256822586, "step": 509 }, { "epoch": 1.3447593935398814, "grad_norm": 9.485255241394043, "learning_rate": 4.45075421472937e-06, "log_odds_chosen": 0.5635192394256592, "log_odds_ratio": -0.4641314148902893, "logits/chosen": -1.1391786336898804, "logits/rejected": -1.0365322828292847, "logps/chosen": -1.392361044883728, "logps/rejected": -1.841564416885376, "loss": 2.4514, "nll_loss": 0.5664429664611816, "rewards/accuracies": 1.0, "rewards/chosen": -0.13923612236976624, "rewards/margins": 0.04492032527923584, "rewards/rejected": -0.1841564178466797, "step": 510 }, { "epoch": 1.3473961766644693, "grad_norm": 9.78370189666748, "learning_rate": 4.443655723158828e-06, "log_odds_chosen": 0.6639434099197388, "log_odds_ratio": -0.45992475748062134, "logits/chosen": -1.0509096384048462, "logits/rejected": -0.9572964310646057, "logps/chosen": -1.3928582668304443, "logps/rejected": -1.928267478942871, "loss": 2.2199, "nll_loss": 0.5089808702468872, "rewards/accuracies": 1.0, "rewards/chosen": -0.1392858326435089, "rewards/margins": 0.053540922701358795, "rewards/rejected": -0.1928267627954483, "step": 511 }, { "epoch": 1.3500329597890572, "grad_norm": 9.828688621520996, "learning_rate": 4.436557231588288e-06, "log_odds_chosen": 0.5940926671028137, "log_odds_ratio": -0.5060535669326782, "logits/chosen": -1.0177522897720337, "logits/rejected": -0.9550021886825562, "logps/chosen": -1.2975274324417114, "logps/rejected": -1.7164931297302246, "loss": 2.29, "nll_loss": 0.5219029188156128, "rewards/accuracies": 0.875, "rewards/chosen": -0.1297527402639389, "rewards/margins": 0.041896574199199677, "rewards/rejected": -0.17164930701255798, "step": 512 }, { "epoch": 1.3526697429136454, "grad_norm": 9.234590530395508, "learning_rate": 4.429458740017746e-06, "log_odds_chosen": 0.46330970525741577, "log_odds_ratio": -0.5083400011062622, "logits/chosen": -1.0437920093536377, "logits/rejected": -0.9815811514854431, "logps/chosen": -1.2863333225250244, "logps/rejected": -1.6402312517166138, "loss": 2.0998, "nll_loss": 0.47411662340164185, "rewards/accuracies": 0.875, "rewards/chosen": -0.1286333203315735, "rewards/margins": 0.03538981080055237, "rewards/rejected": -0.16402314603328705, "step": 513 }, { "epoch": 1.3553065260382333, "grad_norm": 9.480605125427246, "learning_rate": 4.4223602484472055e-06, "log_odds_chosen": 0.4388739764690399, "log_odds_ratio": -0.5134546756744385, "logits/chosen": -1.045832872390747, "logits/rejected": -0.9835963249206543, "logps/chosen": -1.331667184829712, "logps/rejected": -1.685315728187561, "loss": 2.0702, "nll_loss": 0.46619364619255066, "rewards/accuracies": 0.875, "rewards/chosen": -0.13316671550273895, "rewards/margins": 0.03536485508084297, "rewards/rejected": -0.16853156685829163, "step": 514 }, { "epoch": 1.3579433091628212, "grad_norm": 9.261907577514648, "learning_rate": 4.4152617568766635e-06, "log_odds_chosen": 0.5002005100250244, "log_odds_ratio": -0.48809945583343506, "logits/chosen": -1.0778452157974243, "logits/rejected": -1.015285611152649, "logps/chosen": -1.3214576244354248, "logps/rejected": -1.7165311574935913, "loss": 2.4748, "nll_loss": 0.5698955059051514, "rewards/accuracies": 1.0, "rewards/chosen": -0.13214576244354248, "rewards/margins": 0.03950735554099083, "rewards/rejected": -0.1716531217098236, "step": 515 }, { "epoch": 1.3605800922874094, "grad_norm": 10.901313781738281, "learning_rate": 4.4081632653061216e-06, "log_odds_chosen": 0.4575445055961609, "log_odds_ratio": -0.49447035789489746, "logits/chosen": -1.136452078819275, "logits/rejected": -1.0185503959655762, "logps/chosen": -1.5689027309417725, "logps/rejected": -1.9473178386688232, "loss": 2.9467, "nll_loss": 0.6872245669364929, "rewards/accuracies": 1.0, "rewards/chosen": -0.15689027309417725, "rewards/margins": 0.03784151002764702, "rewards/rejected": -0.19473177194595337, "step": 516 }, { "epoch": 1.3632168754119973, "grad_norm": 11.190934181213379, "learning_rate": 4.401064773735581e-06, "log_odds_chosen": 0.5212029218673706, "log_odds_ratio": -0.47070425748825073, "logits/chosen": -1.108872890472412, "logits/rejected": -1.0137286186218262, "logps/chosen": -1.5634641647338867, "logps/rejected": -1.9933662414550781, "loss": 2.7669, "nll_loss": 0.6446561813354492, "rewards/accuracies": 1.0, "rewards/chosen": -0.1563464105129242, "rewards/margins": 0.042990200221538544, "rewards/rejected": -0.19933661818504333, "step": 517 }, { "epoch": 1.3658536585365852, "grad_norm": 9.640596389770508, "learning_rate": 4.393966282165039e-06, "log_odds_chosen": 0.3897729218006134, "log_odds_ratio": -0.5306410193443298, "logits/chosen": -1.0666706562042236, "logits/rejected": -1.018998146057129, "logps/chosen": -1.2988317012786865, "logps/rejected": -1.5959352254867554, "loss": 2.1505, "nll_loss": 0.4845691919326782, "rewards/accuracies": 0.875, "rewards/chosen": -0.12988317012786865, "rewards/margins": 0.029710350558161736, "rewards/rejected": -0.15959352254867554, "step": 518 }, { "epoch": 1.3684904416611734, "grad_norm": 9.896682739257812, "learning_rate": 4.386867790594498e-06, "log_odds_chosen": 0.422545850276947, "log_odds_ratio": -0.5287402272224426, "logits/chosen": -0.9611405730247498, "logits/rejected": -0.926899790763855, "logps/chosen": -1.270667552947998, "logps/rejected": -1.5901453495025635, "loss": 2.1228, "nll_loss": 0.4778139591217041, "rewards/accuracies": 0.75, "rewards/chosen": -0.12706676125526428, "rewards/margins": 0.031947776675224304, "rewards/rejected": -0.15901455283164978, "step": 519 }, { "epoch": 1.3711272247857613, "grad_norm": 9.610637664794922, "learning_rate": 4.379769299023957e-06, "log_odds_chosen": 0.5789295434951782, "log_odds_ratio": -0.4628753364086151, "logits/chosen": -1.0623466968536377, "logits/rejected": -0.9665452837944031, "logps/chosen": -1.2811853885650635, "logps/rejected": -1.7341910600662231, "loss": 2.0906, "nll_loss": 0.47636866569519043, "rewards/accuracies": 1.0, "rewards/chosen": -0.12811852991580963, "rewards/margins": 0.04530058056116104, "rewards/rejected": -0.17341911792755127, "step": 520 }, { "epoch": 1.3737640079103492, "grad_norm": 9.621949195861816, "learning_rate": 4.372670807453416e-06, "log_odds_chosen": 0.2818030118942261, "log_odds_ratio": -0.5760889053344727, "logits/chosen": -1.117762804031372, "logits/rejected": -1.0882344245910645, "logps/chosen": -1.3426880836486816, "logps/rejected": -1.5510759353637695, "loss": 2.2966, "nll_loss": 0.5165323615074158, "rewards/accuracies": 0.75, "rewards/chosen": -0.13426882028579712, "rewards/margins": 0.020838771015405655, "rewards/rejected": -0.15510760247707367, "step": 521 }, { "epoch": 1.3764007910349374, "grad_norm": 11.08607292175293, "learning_rate": 4.365572315882875e-06, "log_odds_chosen": 0.4651279151439667, "log_odds_ratio": -0.5054472088813782, "logits/chosen": -1.1419415473937988, "logits/rejected": -1.0225671529769897, "logps/chosen": -1.720205545425415, "logps/rejected": -2.105118989944458, "loss": 3.356, "nll_loss": 0.7884448766708374, "rewards/accuracies": 0.75, "rewards/chosen": -0.1720205545425415, "rewards/margins": 0.03849135339260101, "rewards/rejected": -0.2105119228363037, "step": 522 }, { "epoch": 1.3790375741595253, "grad_norm": 9.302968978881836, "learning_rate": 4.358473824312334e-06, "log_odds_chosen": 0.49724501371383667, "log_odds_ratio": -0.48611587285995483, "logits/chosen": -1.0597364902496338, "logits/rejected": -0.9594701528549194, "logps/chosen": -1.234031319618225, "logps/rejected": -1.5941500663757324, "loss": 2.0146, "nll_loss": 0.4550420045852661, "rewards/accuracies": 1.0, "rewards/chosen": -0.1234031394124031, "rewards/margins": 0.036011867225170135, "rewards/rejected": -0.15941500663757324, "step": 523 }, { "epoch": 1.3816743572841133, "grad_norm": 9.23997688293457, "learning_rate": 4.351375332741792e-06, "log_odds_chosen": 0.5455493927001953, "log_odds_ratio": -0.4641241133213043, "logits/chosen": -1.0696187019348145, "logits/rejected": -0.96488356590271, "logps/chosen": -1.2787021398544312, "logps/rejected": -1.6961209774017334, "loss": 1.9613, "nll_loss": 0.4439108073711395, "rewards/accuracies": 1.0, "rewards/chosen": -0.12787020206451416, "rewards/margins": 0.04174188897013664, "rewards/rejected": -0.1696121096611023, "step": 524 }, { "epoch": 1.3843111404087014, "grad_norm": 10.52160930633545, "learning_rate": 4.344276841171251e-06, "log_odds_chosen": 0.1157563179731369, "log_odds_ratio": -0.6474640965461731, "logits/chosen": -1.0222586393356323, "logits/rejected": -0.9553705453872681, "logps/chosen": -1.728295087814331, "logps/rejected": -1.821520209312439, "loss": 2.8794, "nll_loss": 0.6551076769828796, "rewards/accuracies": 0.75, "rewards/chosen": -0.1728295087814331, "rewards/margins": 0.009322520345449448, "rewards/rejected": -0.18215203285217285, "step": 525 }, { "epoch": 1.3869479235332893, "grad_norm": 8.591838836669922, "learning_rate": 4.33717834960071e-06, "log_odds_chosen": 0.7126994132995605, "log_odds_ratio": -0.4387003481388092, "logits/chosen": -1.038390874862671, "logits/rejected": -1.0139986276626587, "logps/chosen": -1.054718017578125, "logps/rejected": -1.5185861587524414, "loss": 1.7589, "nll_loss": 0.395847350358963, "rewards/accuracies": 0.875, "rewards/chosen": -0.10547180473804474, "rewards/margins": 0.04638680815696716, "rewards/rejected": -0.1518586277961731, "step": 526 }, { "epoch": 1.3895847066578773, "grad_norm": 9.613905906677246, "learning_rate": 4.330079858030168e-06, "log_odds_chosen": 0.5708151459693909, "log_odds_ratio": -0.4756304621696472, "logits/chosen": -1.0896623134613037, "logits/rejected": -0.9856705665588379, "logps/chosen": -1.3225669860839844, "logps/rejected": -1.78826904296875, "loss": 2.3778, "nll_loss": 0.5468798279762268, "rewards/accuracies": 1.0, "rewards/chosen": -0.13225671648979187, "rewards/margins": 0.04657018184661865, "rewards/rejected": -0.17882689833641052, "step": 527 }, { "epoch": 1.3922214897824654, "grad_norm": 8.894762992858887, "learning_rate": 4.3229813664596275e-06, "log_odds_chosen": 0.5077429413795471, "log_odds_ratio": -0.476772665977478, "logits/chosen": -1.020542860031128, "logits/rejected": -0.9697209596633911, "logps/chosen": -1.1626381874084473, "logps/rejected": -1.54520583152771, "loss": 1.6572, "nll_loss": 0.36661040782928467, "rewards/accuracies": 1.0, "rewards/chosen": -0.11626380681991577, "rewards/margins": 0.03825676441192627, "rewards/rejected": -0.15452057123184204, "step": 528 }, { "epoch": 1.3948582729070533, "grad_norm": 9.532736778259277, "learning_rate": 4.3158828748890856e-06, "log_odds_chosen": 0.5159969925880432, "log_odds_ratio": -0.482917457818985, "logits/chosen": -0.9628971815109253, "logits/rejected": -0.9138011336326599, "logps/chosen": -1.3670532703399658, "logps/rejected": -1.7700910568237305, "loss": 2.2592, "nll_loss": 0.5165103077888489, "rewards/accuracies": 0.875, "rewards/chosen": -0.13670533895492554, "rewards/margins": 0.04030377417802811, "rewards/rejected": -0.17700910568237305, "step": 529 }, { "epoch": 1.3974950560316415, "grad_norm": 10.507089614868164, "learning_rate": 4.3087843833185445e-06, "log_odds_chosen": 0.26568925380706787, "log_odds_ratio": -0.5747084021568298, "logits/chosen": -1.159207820892334, "logits/rejected": -1.0467674732208252, "logps/chosen": -1.3419283628463745, "logps/rejected": -1.5456748008728027, "loss": 2.7376, "nll_loss": 0.6269404292106628, "rewards/accuracies": 0.875, "rewards/chosen": -0.1341928392648697, "rewards/margins": 0.02037465199828148, "rewards/rejected": -0.15456749498844147, "step": 530 }, { "epoch": 1.4001318391562294, "grad_norm": 9.721789360046387, "learning_rate": 4.301685891748003e-06, "log_odds_chosen": 0.36365044116973877, "log_odds_ratio": -0.548660397529602, "logits/chosen": -1.0611029863357544, "logits/rejected": -1.0090358257293701, "logps/chosen": -1.3427720069885254, "logps/rejected": -1.5878149271011353, "loss": 2.2581, "nll_loss": 0.5096607208251953, "rewards/accuracies": 0.875, "rewards/chosen": -0.13427719473838806, "rewards/margins": 0.024504294618964195, "rewards/rejected": -0.158781498670578, "step": 531 }, { "epoch": 1.4027686222808173, "grad_norm": 9.990262985229492, "learning_rate": 4.294587400177462e-06, "log_odds_chosen": 0.2077985554933548, "log_odds_ratio": -0.605369508266449, "logits/chosen": -1.0180903673171997, "logits/rejected": -0.9370006918907166, "logps/chosen": -1.3981273174285889, "logps/rejected": -1.5546506643295288, "loss": 2.0418, "nll_loss": 0.44991248846054077, "rewards/accuracies": 0.75, "rewards/chosen": -0.13981273770332336, "rewards/margins": 0.015652332454919815, "rewards/rejected": -0.15546506643295288, "step": 532 }, { "epoch": 1.4054054054054055, "grad_norm": 9.432575225830078, "learning_rate": 4.287488908606921e-06, "log_odds_chosen": 0.6490946412086487, "log_odds_ratio": -0.44073981046676636, "logits/chosen": -1.0299392938613892, "logits/rejected": -0.9620039463043213, "logps/chosen": -1.2669686079025269, "logps/rejected": -1.7586071491241455, "loss": 2.1131, "nll_loss": 0.48419713973999023, "rewards/accuracies": 1.0, "rewards/chosen": -0.1266968548297882, "rewards/margins": 0.04916385933756828, "rewards/rejected": -0.1758607178926468, "step": 533 }, { "epoch": 1.4080421885299934, "grad_norm": 9.760756492614746, "learning_rate": 4.280390417036379e-06, "log_odds_chosen": 0.5309910774230957, "log_odds_ratio": -0.47286418080329895, "logits/chosen": -1.0874316692352295, "logits/rejected": -0.9884422421455383, "logps/chosen": -1.1690242290496826, "logps/rejected": -1.5218350887298584, "loss": 2.1617, "nll_loss": 0.4931284189224243, "rewards/accuracies": 1.0, "rewards/chosen": -0.1169024258852005, "rewards/margins": 0.035281069576740265, "rewards/rejected": -0.15218350291252136, "step": 534 }, { "epoch": 1.4106789716545813, "grad_norm": 11.032233238220215, "learning_rate": 4.273291925465838e-06, "log_odds_chosen": 0.32740920782089233, "log_odds_ratio": -0.5480507016181946, "logits/chosen": -1.1221764087677002, "logits/rejected": -1.0313825607299805, "logps/chosen": -1.4779345989227295, "logps/rejected": -1.7428152561187744, "loss": 3.2183, "nll_loss": 0.7497601509094238, "rewards/accuracies": 1.0, "rewards/chosen": -0.1477934718132019, "rewards/margins": 0.026488065719604492, "rewards/rejected": -0.1742815375328064, "step": 535 }, { "epoch": 1.4133157547791695, "grad_norm": 10.23257827758789, "learning_rate": 4.266193433895297e-06, "log_odds_chosen": 0.5347425937652588, "log_odds_ratio": -0.4779736399650574, "logits/chosen": -1.0907363891601562, "logits/rejected": -0.9816246032714844, "logps/chosen": -1.4487974643707275, "logps/rejected": -1.8768093585968018, "loss": 2.5063, "nll_loss": 0.5787882804870605, "rewards/accuracies": 0.875, "rewards/chosen": -0.14487972855567932, "rewards/margins": 0.04280121624469757, "rewards/rejected": -0.18768095970153809, "step": 536 }, { "epoch": 1.4159525379037574, "grad_norm": 10.971465110778809, "learning_rate": 4.259094942324756e-06, "log_odds_chosen": 0.4730498492717743, "log_odds_ratio": -0.49586889147758484, "logits/chosen": -1.0929844379425049, "logits/rejected": -0.9882125854492188, "logps/chosen": -1.4657291173934937, "logps/rejected": -1.8439651727676392, "loss": 2.9872, "nll_loss": 0.6972134113311768, "rewards/accuracies": 0.875, "rewards/chosen": -0.14657291769981384, "rewards/margins": 0.037823598831892014, "rewards/rejected": -0.18439652025699615, "step": 537 }, { "epoch": 1.4185893210283453, "grad_norm": 9.797980308532715, "learning_rate": 4.251996450754214e-06, "log_odds_chosen": 0.24408963322639465, "log_odds_ratio": -0.5894317626953125, "logits/chosen": -1.027978777885437, "logits/rejected": -0.9461379051208496, "logps/chosen": -1.6278281211853027, "logps/rejected": -1.8245741128921509, "loss": 2.4087, "nll_loss": 0.5432250499725342, "rewards/accuracies": 0.875, "rewards/chosen": -0.16278283298015594, "rewards/margins": 0.01967458799481392, "rewards/rejected": -0.18245741724967957, "step": 538 }, { "epoch": 1.4212261041529335, "grad_norm": 10.824618339538574, "learning_rate": 4.244897959183674e-06, "log_odds_chosen": 0.3454328179359436, "log_odds_ratio": -0.547885000705719, "logits/chosen": -1.0612674951553345, "logits/rejected": -1.0093002319335938, "logps/chosen": -1.5636953115463257, "logps/rejected": -1.837222695350647, "loss": 2.5403, "nll_loss": 0.580278217792511, "rewards/accuracies": 0.875, "rewards/chosen": -0.15636955201625824, "rewards/margins": 0.027352729812264442, "rewards/rejected": -0.18372228741645813, "step": 539 }, { "epoch": 1.4238628872775214, "grad_norm": 8.917954444885254, "learning_rate": 4.237799467613132e-06, "log_odds_chosen": 0.5565085411071777, "log_odds_ratio": -0.4850555658340454, "logits/chosen": -1.04508638381958, "logits/rejected": -0.9539642333984375, "logps/chosen": -1.1825885772705078, "logps/rejected": -1.594106912612915, "loss": 1.8202, "nll_loss": 0.4065341353416443, "rewards/accuracies": 0.875, "rewards/chosen": -0.11825884878635406, "rewards/margins": 0.04115184396505356, "rewards/rejected": -0.15941071510314941, "step": 540 }, { "epoch": 1.4264996704021096, "grad_norm": 9.302490234375, "learning_rate": 4.2307009760425915e-06, "log_odds_chosen": 0.8080810904502869, "log_odds_ratio": -0.4509783089160919, "logits/chosen": -1.0430349111557007, "logits/rejected": -0.9668057560920715, "logps/chosen": -1.3211041688919067, "logps/rejected": -1.9904488325119019, "loss": 2.0318, "nll_loss": 0.46285194158554077, "rewards/accuracies": 0.75, "rewards/chosen": -0.13211041688919067, "rewards/margins": 0.0669344812631607, "rewards/rejected": -0.19904489815235138, "step": 541 }, { "epoch": 1.4291364535266975, "grad_norm": 10.374223709106445, "learning_rate": 4.2236024844720496e-06, "log_odds_chosen": 0.27454304695129395, "log_odds_ratio": -0.5771620273590088, "logits/chosen": -1.1492888927459717, "logits/rejected": -1.0261468887329102, "logps/chosen": -1.4748188257217407, "logps/rejected": -1.6907557249069214, "loss": 2.8768, "nll_loss": 0.661479651927948, "rewards/accuracies": 0.75, "rewards/chosen": -0.14748188853263855, "rewards/margins": 0.021593693643808365, "rewards/rejected": -0.16907557845115662, "step": 542 }, { "epoch": 1.4317732366512854, "grad_norm": 10.054798126220703, "learning_rate": 4.216503992901508e-06, "log_odds_chosen": 0.5260477066040039, "log_odds_ratio": -0.47824597358703613, "logits/chosen": -1.104248046875, "logits/rejected": -1.0554959774017334, "logps/chosen": -1.3430862426757812, "logps/rejected": -1.7557330131530762, "loss": 2.4337, "nll_loss": 0.5605974197387695, "rewards/accuracies": 1.0, "rewards/chosen": -0.13430863618850708, "rewards/margins": 0.04126469045877457, "rewards/rejected": -0.17557331919670105, "step": 543 }, { "epoch": 1.4344100197758736, "grad_norm": 9.858833312988281, "learning_rate": 4.209405501330967e-06, "log_odds_chosen": 0.4538835883140564, "log_odds_ratio": -0.49726781249046326, "logits/chosen": -1.0619155168533325, "logits/rejected": -0.9338789582252502, "logps/chosen": -1.4854230880737305, "logps/rejected": -1.848534345626831, "loss": 2.51, "nll_loss": 0.5777809023857117, "rewards/accuracies": 1.0, "rewards/chosen": -0.14854231476783752, "rewards/margins": 0.036311134696006775, "rewards/rejected": -0.1848534345626831, "step": 544 }, { "epoch": 1.4370468029004615, "grad_norm": 10.137702941894531, "learning_rate": 4.202307009760425e-06, "log_odds_chosen": 0.7637728452682495, "log_odds_ratio": -0.44855618476867676, "logits/chosen": -1.0617544651031494, "logits/rejected": -0.960517406463623, "logps/chosen": -1.394372820854187, "logps/rejected": -2.043037176132202, "loss": 2.3223, "nll_loss": 0.5357198715209961, "rewards/accuracies": 1.0, "rewards/chosen": -0.13943728804588318, "rewards/margins": 0.06486643850803375, "rewards/rejected": -0.20430371165275574, "step": 545 }, { "epoch": 1.4396835860250494, "grad_norm": 9.720535278320312, "learning_rate": 4.195208518189884e-06, "log_odds_chosen": 0.7124000191688538, "log_odds_ratio": -0.41061651706695557, "logits/chosen": -1.0555087327957153, "logits/rejected": -0.9663803577423096, "logps/chosen": -1.3942837715148926, "logps/rejected": -1.9674293994903564, "loss": 2.0966, "nll_loss": 0.4830893576145172, "rewards/accuracies": 1.0, "rewards/chosen": -0.13942837715148926, "rewards/margins": 0.057314563542604446, "rewards/rejected": -0.1967429518699646, "step": 546 }, { "epoch": 1.4423203691496376, "grad_norm": 9.361237525939941, "learning_rate": 4.188110026619343e-06, "log_odds_chosen": 0.46525129675865173, "log_odds_ratio": -0.5079076290130615, "logits/chosen": -1.0968852043151855, "logits/rejected": -1.0257980823516846, "logps/chosen": -1.176173448562622, "logps/rejected": -1.51423978805542, "loss": 2.3172, "nll_loss": 0.5285149812698364, "rewards/accuracies": 1.0, "rewards/chosen": -0.11761735379695892, "rewards/margins": 0.033806636929512024, "rewards/rejected": -0.15142399072647095, "step": 547 }, { "epoch": 1.4449571522742255, "grad_norm": 10.04572868347168, "learning_rate": 4.181011535048802e-06, "log_odds_chosen": 0.4429911971092224, "log_odds_ratio": -0.5006264448165894, "logits/chosen": -1.0217193365097046, "logits/rejected": -0.9494048357009888, "logps/chosen": -1.330000638961792, "logps/rejected": -1.6583576202392578, "loss": 2.1335, "nll_loss": 0.4833093583583832, "rewards/accuracies": 1.0, "rewards/chosen": -0.13300006091594696, "rewards/margins": 0.0328357107937336, "rewards/rejected": -0.16583578288555145, "step": 548 }, { "epoch": 1.4475939353988134, "grad_norm": 10.25400447845459, "learning_rate": 4.17391304347826e-06, "log_odds_chosen": 0.7248557806015015, "log_odds_ratio": -0.4106558561325073, "logits/chosen": -1.019441843032837, "logits/rejected": -0.9044776558876038, "logps/chosen": -1.3269296884536743, "logps/rejected": -1.9024122953414917, "loss": 2.1889, "nll_loss": 0.5061638951301575, "rewards/accuracies": 1.0, "rewards/chosen": -0.13269296288490295, "rewards/margins": 0.057548269629478455, "rewards/rejected": -0.1902412325143814, "step": 549 }, { "epoch": 1.4502307185234016, "grad_norm": 10.040661811828613, "learning_rate": 4.16681455190772e-06, "log_odds_chosen": 0.36954087018966675, "log_odds_ratio": -0.536496639251709, "logits/chosen": -1.0806694030761719, "logits/rejected": -0.9924178123474121, "logps/chosen": -1.3371779918670654, "logps/rejected": -1.626185417175293, "loss": 2.1673, "nll_loss": 0.4881811738014221, "rewards/accuracies": 1.0, "rewards/chosen": -0.13371780514717102, "rewards/margins": 0.028900746256113052, "rewards/rejected": -0.16261856257915497, "step": 550 }, { "epoch": 1.4528675016479895, "grad_norm": 10.27868938446045, "learning_rate": 4.159716060337178e-06, "log_odds_chosen": 0.6784330010414124, "log_odds_ratio": -0.41786181926727295, "logits/chosen": -1.0871484279632568, "logits/rejected": -1.0069628953933716, "logps/chosen": -1.372645378112793, "logps/rejected": -1.9133880138397217, "loss": 2.4847, "nll_loss": 0.5793927907943726, "rewards/accuracies": 1.0, "rewards/chosen": -0.13726454973220825, "rewards/margins": 0.05407426133751869, "rewards/rejected": -0.19133880734443665, "step": 551 }, { "epoch": 1.4555042847725774, "grad_norm": 9.83980655670166, "learning_rate": 4.152617568766637e-06, "log_odds_chosen": 0.744712233543396, "log_odds_ratio": -0.4128277897834778, "logits/chosen": -1.1573163270950317, "logits/rejected": -0.9828042984008789, "logps/chosen": -1.256611943244934, "logps/rejected": -1.8465502262115479, "loss": 2.3454, "nll_loss": 0.5450708866119385, "rewards/accuracies": 1.0, "rewards/chosen": -0.1256611943244934, "rewards/margins": 0.058993831276893616, "rewards/rejected": -0.18465502560138702, "step": 552 }, { "epoch": 1.4581410678971656, "grad_norm": 9.363921165466309, "learning_rate": 4.145519077196096e-06, "log_odds_chosen": 0.5384882688522339, "log_odds_ratio": -0.4847624897956848, "logits/chosen": -1.004069447517395, "logits/rejected": -0.9431107044219971, "logps/chosen": -1.204410195350647, "logps/rejected": -1.5975055694580078, "loss": 1.843, "nll_loss": 0.4122834801673889, "rewards/accuracies": 0.75, "rewards/chosen": -0.1204410195350647, "rewards/margins": 0.039309531450271606, "rewards/rejected": -0.1597505509853363, "step": 553 }, { "epoch": 1.4607778510217535, "grad_norm": 10.366519927978516, "learning_rate": 4.138420585625554e-06, "log_odds_chosen": 0.45709162950515747, "log_odds_ratio": -0.493826687335968, "logits/chosen": -1.0873271226882935, "logits/rejected": -0.9958299398422241, "logps/chosen": -1.5129612684249878, "logps/rejected": -1.873511791229248, "loss": 2.9732, "nll_loss": 0.6939066052436829, "rewards/accuracies": 1.0, "rewards/chosen": -0.15129612386226654, "rewards/margins": 0.0360550582408905, "rewards/rejected": -0.18735116720199585, "step": 554 }, { "epoch": 1.4634146341463414, "grad_norm": 10.013711929321289, "learning_rate": 4.1313220940550136e-06, "log_odds_chosen": 0.5878893136978149, "log_odds_ratio": -0.44947680830955505, "logits/chosen": -1.0896657705307007, "logits/rejected": -1.0171089172363281, "logps/chosen": -1.387203335762024, "logps/rejected": -1.8584654331207275, "loss": 2.221, "nll_loss": 0.5102908611297607, "rewards/accuracies": 1.0, "rewards/chosen": -0.1387203335762024, "rewards/margins": 0.04712621867656708, "rewards/rejected": -0.18584656715393066, "step": 555 }, { "epoch": 1.4660514172709296, "grad_norm": 10.238524436950684, "learning_rate": 4.124223602484472e-06, "log_odds_chosen": 0.3882477879524231, "log_odds_ratio": -0.5345123410224915, "logits/chosen": -1.0624442100524902, "logits/rejected": -1.014449119567871, "logps/chosen": -1.318690299987793, "logps/rejected": -1.590446949005127, "loss": 2.6513, "nll_loss": 0.609370231628418, "rewards/accuracies": 0.875, "rewards/chosen": -0.13186903297901154, "rewards/margins": 0.027175655588507652, "rewards/rejected": -0.15904468297958374, "step": 556 }, { "epoch": 1.4686882003955175, "grad_norm": 10.251365661621094, "learning_rate": 4.1171251109139305e-06, "log_odds_chosen": 0.6487367153167725, "log_odds_ratio": -0.44119197130203247, "logits/chosen": -0.9631329774856567, "logits/rejected": -0.9411216974258423, "logps/chosen": -1.4211435317993164, "logps/rejected": -1.9619696140289307, "loss": 1.8292, "nll_loss": 0.4131847620010376, "rewards/accuracies": 0.875, "rewards/chosen": -0.14211437106132507, "rewards/margins": 0.05408259108662605, "rewards/rejected": -0.19619694352149963, "step": 557 }, { "epoch": 1.4713249835201054, "grad_norm": 9.43930721282959, "learning_rate": 4.110026619343389e-06, "log_odds_chosen": 0.6696327924728394, "log_odds_ratio": -0.4534551799297333, "logits/chosen": -1.0974669456481934, "logits/rejected": -0.9728105068206787, "logps/chosen": -1.215304970741272, "logps/rejected": -1.702378749847412, "loss": 2.1915, "nll_loss": 0.5025386214256287, "rewards/accuracies": 1.0, "rewards/chosen": -0.12153048813343048, "rewards/margins": 0.04870738089084625, "rewards/rejected": -0.17023788392543793, "step": 558 }, { "epoch": 1.4739617666446936, "grad_norm": 9.472445487976074, "learning_rate": 4.102928127772848e-06, "log_odds_chosen": 0.45684900879859924, "log_odds_ratio": -0.5050070881843567, "logits/chosen": -1.0497395992279053, "logits/rejected": -0.9759555459022522, "logps/chosen": -1.3387244939804077, "logps/rejected": -1.6942470073699951, "loss": 1.9954, "nll_loss": 0.44833865761756897, "rewards/accuracies": 0.875, "rewards/chosen": -0.13387244939804077, "rewards/margins": 0.0355522483587265, "rewards/rejected": -0.16942471265792847, "step": 559 }, { "epoch": 1.4765985497692815, "grad_norm": 10.136690139770508, "learning_rate": 4.095829636202307e-06, "log_odds_chosen": 0.5725353956222534, "log_odds_ratio": -0.4533199667930603, "logits/chosen": -1.0816022157669067, "logits/rejected": -0.9956217408180237, "logps/chosen": -1.3826810121536255, "logps/rejected": -1.8414275646209717, "loss": 2.3122, "nll_loss": 0.5327115058898926, "rewards/accuracies": 1.0, "rewards/chosen": -0.1382680982351303, "rewards/margins": 0.04587465897202492, "rewards/rejected": -0.18414275348186493, "step": 560 }, { "epoch": 1.4792353328938694, "grad_norm": 10.132454872131348, "learning_rate": 4.088731144631765e-06, "log_odds_chosen": 0.4786339998245239, "log_odds_ratio": -0.5015438199043274, "logits/chosen": -1.1128901243209839, "logits/rejected": -1.0538095235824585, "logps/chosen": -1.345731496810913, "logps/rejected": -1.7082313299179077, "loss": 2.3008, "nll_loss": 0.5250552296638489, "rewards/accuracies": 0.75, "rewards/chosen": -0.13457316160202026, "rewards/margins": 0.03624997287988663, "rewards/rejected": -0.1708231270313263, "step": 561 }, { "epoch": 1.4818721160184576, "grad_norm": 8.780614852905273, "learning_rate": 4.081632653061224e-06, "log_odds_chosen": 0.8551339507102966, "log_odds_ratio": -0.38810813426971436, "logits/chosen": -0.9988934397697449, "logits/rejected": -0.9784669876098633, "logps/chosen": -1.1678969860076904, "logps/rejected": -1.8136950731277466, "loss": 1.6219, "nll_loss": 0.36667054891586304, "rewards/accuracies": 1.0, "rewards/chosen": -0.11678969860076904, "rewards/margins": 0.0645797997713089, "rewards/rejected": -0.18136951327323914, "step": 562 }, { "epoch": 1.4845088991430455, "grad_norm": 9.880027770996094, "learning_rate": 4.074534161490683e-06, "log_odds_chosen": 0.42561155557632446, "log_odds_ratio": -0.5181151032447815, "logits/chosen": -1.0898230075836182, "logits/rejected": -1.0091991424560547, "logps/chosen": -1.339963436126709, "logps/rejected": -1.6795657873153687, "loss": 2.1117, "nll_loss": 0.47612476348876953, "rewards/accuracies": 0.875, "rewards/chosen": -0.13399635255336761, "rewards/margins": 0.03396022692322731, "rewards/rejected": -0.16795656085014343, "step": 563 }, { "epoch": 1.4871456822676334, "grad_norm": 9.44789981842041, "learning_rate": 4.067435669920142e-06, "log_odds_chosen": 0.31164228916168213, "log_odds_ratio": -0.5770187973976135, "logits/chosen": -1.0990424156188965, "logits/rejected": -0.9864567518234253, "logps/chosen": -1.3892393112182617, "logps/rejected": -1.6200555562973022, "loss": 2.4427, "nll_loss": 0.5529693961143494, "rewards/accuracies": 0.625, "rewards/chosen": -0.13892394304275513, "rewards/margins": 0.023081636056303978, "rewards/rejected": -0.16200555860996246, "step": 564 }, { "epoch": 1.4897824653922216, "grad_norm": 9.407962799072266, "learning_rate": 4.0603371783496e-06, "log_odds_chosen": 0.4963398277759552, "log_odds_ratio": -0.4997844099998474, "logits/chosen": -1.1170499324798584, "logits/rejected": -1.025063157081604, "logps/chosen": -1.3016924858093262, "logps/rejected": -1.642601728439331, "loss": 2.4696, "nll_loss": 0.5674134492874146, "rewards/accuracies": 0.875, "rewards/chosen": -0.13016925752162933, "rewards/margins": 0.034090910106897354, "rewards/rejected": -0.1642601639032364, "step": 565 }, { "epoch": 1.4924192485168095, "grad_norm": 9.735568046569824, "learning_rate": 4.05323868677906e-06, "log_odds_chosen": 0.46814143657684326, "log_odds_ratio": -0.49939918518066406, "logits/chosen": -1.1061391830444336, "logits/rejected": -0.9936612844467163, "logps/chosen": -1.3603556156158447, "logps/rejected": -1.7409652471542358, "loss": 2.2104, "nll_loss": 0.5026587843894958, "rewards/accuracies": 0.875, "rewards/chosen": -0.13603554666042328, "rewards/margins": 0.03806097060441971, "rewards/rejected": -0.17409652471542358, "step": 566 }, { "epoch": 1.4950560316413974, "grad_norm": 9.404352188110352, "learning_rate": 4.046140195208518e-06, "log_odds_chosen": 0.5777267813682556, "log_odds_ratio": -0.45638012886047363, "logits/chosen": -1.0536627769470215, "logits/rejected": -0.9947332143783569, "logps/chosen": -1.2639999389648438, "logps/rejected": -1.7096264362335205, "loss": 2.0162, "nll_loss": 0.4584140181541443, "rewards/accuracies": 1.0, "rewards/chosen": -0.12639999389648438, "rewards/margins": 0.044562652707099915, "rewards/rejected": -0.1709626317024231, "step": 567 }, { "epoch": 1.4976928147659856, "grad_norm": 9.94635009765625, "learning_rate": 4.039041703637977e-06, "log_odds_chosen": 0.3626604974269867, "log_odds_ratio": -0.54862380027771, "logits/chosen": -1.0114367008209229, "logits/rejected": -0.8889374732971191, "logps/chosen": -1.3621025085449219, "logps/rejected": -1.6648523807525635, "loss": 1.9989, "nll_loss": 0.44487372040748596, "rewards/accuracies": 0.875, "rewards/chosen": -0.13621026277542114, "rewards/margins": 0.03027498535811901, "rewards/rejected": -0.1664852499961853, "step": 568 }, { "epoch": 1.5003295978905735, "grad_norm": 10.303240776062012, "learning_rate": 4.031943212067436e-06, "log_odds_chosen": 0.5516713857650757, "log_odds_ratio": -0.4821397066116333, "logits/chosen": -1.1247400045394897, "logits/rejected": -0.9605581760406494, "logps/chosen": -1.6147916316986084, "logps/rejected": -2.07356333732605, "loss": 3.0803, "nll_loss": 0.7218732237815857, "rewards/accuracies": 1.0, "rewards/chosen": -0.1614791750907898, "rewards/margins": 0.04587716609239578, "rewards/rejected": -0.20735633373260498, "step": 569 }, { "epoch": 1.5003295978905735, "eval_log_odds_chosen": 0.49622219800949097, "eval_log_odds_ratio": -0.49882009625434875, "eval_logits/chosen": -1.0913243293762207, "eval_logits/rejected": -0.9999473690986633, "eval_logps/chosen": -1.379929542541504, "eval_logps/rejected": -1.7691881656646729, "eval_loss": 0.5929180979728699, "eval_nll_loss": 0.5430360436439514, "eval_rewards/accuracies": 0.9053254723548889, "eval_rewards/chosen": -0.1379929631948471, "eval_rewards/margins": 0.03892587497830391, "eval_rewards/rejected": -0.17691883444786072, "eval_runtime": 129.4908, "eval_samples_per_second": 2.61, "eval_steps_per_second": 1.305, "step": 569 }, { "epoch": 1.5029663810151614, "grad_norm": 9.44087028503418, "learning_rate": 4.024844720496894e-06, "log_odds_chosen": 0.5854195952415466, "log_odds_ratio": -0.47436830401420593, "logits/chosen": -1.1242841482162476, "logits/rejected": -0.9996142387390137, "logps/chosen": -1.2071850299835205, "logps/rejected": -1.6707042455673218, "loss": 2.0972, "nll_loss": 0.4768558740615845, "rewards/accuracies": 1.0, "rewards/chosen": -0.12071850150823593, "rewards/margins": 0.046351924538612366, "rewards/rejected": -0.1670704334974289, "step": 570 }, { "epoch": 1.5056031641397496, "grad_norm": 11.15221881866455, "learning_rate": 4.017746228926353e-06, "log_odds_chosen": 0.1619393527507782, "log_odds_ratio": -0.6348941326141357, "logits/chosen": -1.0823087692260742, "logits/rejected": -1.0478005409240723, "logps/chosen": -1.3582721948623657, "logps/rejected": -1.4940277338027954, "loss": 2.7124, "nll_loss": 0.6146115064620972, "rewards/accuracies": 0.625, "rewards/chosen": -0.1358272135257721, "rewards/margins": 0.013575554825365543, "rewards/rejected": -0.14940276741981506, "step": 571 }, { "epoch": 1.5082399472643375, "grad_norm": 10.919987678527832, "learning_rate": 4.0106477373558115e-06, "log_odds_chosen": 0.37946444749832153, "log_odds_ratio": -0.5383967757225037, "logits/chosen": -1.1449512243270874, "logits/rejected": -1.0578222274780273, "logps/chosen": -1.4650253057479858, "logps/rejected": -1.782175898551941, "loss": 2.6081, "nll_loss": 0.5981886386871338, "rewards/accuracies": 0.875, "rewards/chosen": -0.1465025246143341, "rewards/margins": 0.03171507269144058, "rewards/rejected": -0.17821760475635529, "step": 572 }, { "epoch": 1.5108767303889254, "grad_norm": 9.33333969116211, "learning_rate": 4.00354924578527e-06, "log_odds_chosen": 0.3692682981491089, "log_odds_ratio": -0.5407110452651978, "logits/chosen": -0.9620389342308044, "logits/rejected": -0.9305254817008972, "logps/chosen": -1.3830729722976685, "logps/rejected": -1.66476571559906, "loss": 2.3393, "nll_loss": 0.5307450890541077, "rewards/accuracies": 0.625, "rewards/chosen": -0.13830730319023132, "rewards/margins": 0.028169266879558563, "rewards/rejected": -0.1664765626192093, "step": 573 }, { "epoch": 1.5135135135135136, "grad_norm": 10.451208114624023, "learning_rate": 3.996450754214729e-06, "log_odds_chosen": 0.23383724689483643, "log_odds_ratio": -0.5925602912902832, "logits/chosen": -1.1164716482162476, "logits/rejected": -1.0261996984481812, "logps/chosen": -1.4167859554290771, "logps/rejected": -1.5901645421981812, "loss": 2.6467, "nll_loss": 0.6024162173271179, "rewards/accuracies": 0.75, "rewards/chosen": -0.1416786015033722, "rewards/margins": 0.0173378624022007, "rewards/rejected": -0.1590164601802826, "step": 574 }, { "epoch": 1.5161502966381015, "grad_norm": 9.987966537475586, "learning_rate": 3.989352262644188e-06, "log_odds_chosen": 0.3409838080406189, "log_odds_ratio": -0.5625724792480469, "logits/chosen": -1.1221113204956055, "logits/rejected": -0.9838091731071472, "logps/chosen": -1.432151198387146, "logps/rejected": -1.715986728668213, "loss": 2.5528, "nll_loss": 0.5819397568702698, "rewards/accuracies": 0.875, "rewards/chosen": -0.1432151198387146, "rewards/margins": 0.028383558616042137, "rewards/rejected": -0.1715986728668213, "step": 575 }, { "epoch": 1.5187870797626895, "grad_norm": 10.189329147338867, "learning_rate": 3.982253771073646e-06, "log_odds_chosen": 0.294436514377594, "log_odds_ratio": -0.5655215978622437, "logits/chosen": -1.0958954095840454, "logits/rejected": -1.0803096294403076, "logps/chosen": -1.3242357969284058, "logps/rejected": -1.5406522750854492, "loss": 2.6916, "nll_loss": 0.6163361072540283, "rewards/accuracies": 1.0, "rewards/chosen": -0.13242359459400177, "rewards/margins": 0.021641647443175316, "rewards/rejected": -0.15406523644924164, "step": 576 }, { "epoch": 1.5214238628872776, "grad_norm": 10.759533882141113, "learning_rate": 3.975155279503105e-06, "log_odds_chosen": 0.3902096748352051, "log_odds_ratio": -0.5328852534294128, "logits/chosen": -1.082053303718567, "logits/rejected": -1.0068429708480835, "logps/chosen": -1.6502498388290405, "logps/rejected": -1.9628188610076904, "loss": 2.8628, "nll_loss": 0.6624106168746948, "rewards/accuracies": 0.75, "rewards/chosen": -0.165024995803833, "rewards/margins": 0.03125689923763275, "rewards/rejected": -0.19628189504146576, "step": 577 }, { "epoch": 1.5240606460118655, "grad_norm": 9.272303581237793, "learning_rate": 3.968056787932564e-06, "log_odds_chosen": 0.433498740196228, "log_odds_ratio": -0.526934027671814, "logits/chosen": -1.1131141185760498, "logits/rejected": -0.9975295066833496, "logps/chosen": -1.2005765438079834, "logps/rejected": -1.4930449724197388, "loss": 2.2626, "nll_loss": 0.5129634737968445, "rewards/accuracies": 0.875, "rewards/chosen": -0.12005767226219177, "rewards/margins": 0.029246840626001358, "rewards/rejected": -0.14930450916290283, "step": 578 }, { "epoch": 1.5266974291364535, "grad_norm": 9.557279586791992, "learning_rate": 3.960958296362023e-06, "log_odds_chosen": 0.5534155964851379, "log_odds_ratio": -0.47711971402168274, "logits/chosen": -1.162174940109253, "logits/rejected": -1.0576491355895996, "logps/chosen": -1.363930583000183, "logps/rejected": -1.8052783012390137, "loss": 2.5042, "nll_loss": 0.5783388614654541, "rewards/accuracies": 1.0, "rewards/chosen": -0.13639307022094727, "rewards/margins": 0.04413476586341858, "rewards/rejected": -0.18052780628204346, "step": 579 }, { "epoch": 1.5293342122610416, "grad_norm": 10.226247787475586, "learning_rate": 3.953859804791482e-06, "log_odds_chosen": 0.6164693832397461, "log_odds_ratio": -0.477706640958786, "logits/chosen": -1.073448896408081, "logits/rejected": -0.9424384236335754, "logps/chosen": -1.4734885692596436, "logps/rejected": -2.000279426574707, "loss": 2.2613, "nll_loss": 0.5175544619560242, "rewards/accuracies": 0.875, "rewards/chosen": -0.14734885096549988, "rewards/margins": 0.05267908796668053, "rewards/rejected": -0.2000279426574707, "step": 580 }, { "epoch": 1.5319709953856295, "grad_norm": 10.530021667480469, "learning_rate": 3.946761313220941e-06, "log_odds_chosen": 0.30705833435058594, "log_odds_ratio": -0.562725841999054, "logits/chosen": -1.0765693187713623, "logits/rejected": -1.0258369445800781, "logps/chosen": -1.5888984203338623, "logps/rejected": -1.8333849906921387, "loss": 2.9972, "nll_loss": 0.6930161714553833, "rewards/accuracies": 0.875, "rewards/chosen": -0.15888984501361847, "rewards/margins": 0.024448659271001816, "rewards/rejected": -0.1833384931087494, "step": 581 }, { "epoch": 1.5346077785102175, "grad_norm": 9.57894515991211, "learning_rate": 3.9396628216504e-06, "log_odds_chosen": 0.6514328122138977, "log_odds_ratio": -0.4632219672203064, "logits/chosen": -1.1260734796524048, "logits/rejected": -1.0454940795898438, "logps/chosen": -1.3779319524765015, "logps/rejected": -1.9247746467590332, "loss": 2.3697, "nll_loss": 0.5461094379425049, "rewards/accuracies": 1.0, "rewards/chosen": -0.1377931833267212, "rewards/margins": 0.05468427389860153, "rewards/rejected": -0.19247746467590332, "step": 582 }, { "epoch": 1.5372445616348056, "grad_norm": 9.892532348632812, "learning_rate": 3.932564330079858e-06, "log_odds_chosen": 0.5860216617584229, "log_odds_ratio": -0.44781792163848877, "logits/chosen": -1.1517478227615356, "logits/rejected": -1.0297623872756958, "logps/chosen": -1.3620892763137817, "logps/rejected": -1.8288545608520508, "loss": 2.4398, "nll_loss": 0.5651760101318359, "rewards/accuracies": 1.0, "rewards/chosen": -0.1362089365720749, "rewards/margins": 0.046676523983478546, "rewards/rejected": -0.18288546800613403, "step": 583 }, { "epoch": 1.5398813447593935, "grad_norm": 9.396767616271973, "learning_rate": 3.9254658385093166e-06, "log_odds_chosen": 0.46145254373550415, "log_odds_ratio": -0.4998391270637512, "logits/chosen": -1.09555983543396, "logits/rejected": -1.0095515251159668, "logps/chosen": -1.219951868057251, "logps/rejected": -1.5669710636138916, "loss": 2.1258, "nll_loss": 0.4814777970314026, "rewards/accuracies": 1.0, "rewards/chosen": -0.12199518829584122, "rewards/margins": 0.03470191732048988, "rewards/rejected": -0.1566971093416214, "step": 584 }, { "epoch": 1.5425181278839815, "grad_norm": 9.792346954345703, "learning_rate": 3.9183673469387755e-06, "log_odds_chosen": 0.4733620285987854, "log_odds_ratio": -0.5020909309387207, "logits/chosen": -1.044708013534546, "logits/rejected": -0.9730866551399231, "logps/chosen": -1.3304319381713867, "logps/rejected": -1.70212721824646, "loss": 2.0212, "nll_loss": 0.45508724451065063, "rewards/accuracies": 1.0, "rewards/chosen": -0.13304319977760315, "rewards/margins": 0.03716951608657837, "rewards/rejected": -0.1702127307653427, "step": 585 }, { "epoch": 1.5451549110085696, "grad_norm": 10.44377326965332, "learning_rate": 3.911268855368234e-06, "log_odds_chosen": 0.40270447731018066, "log_odds_ratio": -0.5341402292251587, "logits/chosen": -1.0913063287734985, "logits/rejected": -1.0041637420654297, "logps/chosen": -1.365404486656189, "logps/rejected": -1.6835623979568481, "loss": 2.2716, "nll_loss": 0.5144976377487183, "rewards/accuracies": 0.875, "rewards/chosen": -0.13654044270515442, "rewards/margins": 0.031815797090530396, "rewards/rejected": -0.16835623979568481, "step": 586 }, { "epoch": 1.5477916941331575, "grad_norm": 8.901201248168945, "learning_rate": 3.904170363797692e-06, "log_odds_chosen": 0.7658116221427917, "log_odds_ratio": -0.40815699100494385, "logits/chosen": -1.0586168766021729, "logits/rejected": -0.9915466904640198, "logps/chosen": -0.9599915742874146, "logps/rejected": -1.4701061248779297, "loss": 1.775, "nll_loss": 0.4029373228549957, "rewards/accuracies": 1.0, "rewards/chosen": -0.09599915891885757, "rewards/margins": 0.051011450588703156, "rewards/rejected": -0.14701060950756073, "step": 587 }, { "epoch": 1.5504284772577455, "grad_norm": 10.19491195678711, "learning_rate": 3.897071872227151e-06, "log_odds_chosen": 0.4995730221271515, "log_odds_ratio": -0.48944902420043945, "logits/chosen": -1.0612411499023438, "logits/rejected": -1.0142158269882202, "logps/chosen": -1.394554853439331, "logps/rejected": -1.7762850522994995, "loss": 2.2572, "nll_loss": 0.5153642892837524, "rewards/accuracies": 0.875, "rewards/chosen": -0.13945548236370087, "rewards/margins": 0.03817303106188774, "rewards/rejected": -0.1776285171508789, "step": 588 }, { "epoch": 1.5530652603823336, "grad_norm": 10.221670150756836, "learning_rate": 3.88997338065661e-06, "log_odds_chosen": 0.41200292110443115, "log_odds_ratio": -0.5345177054405212, "logits/chosen": -1.0595688819885254, "logits/rejected": -0.9596129655838013, "logps/chosen": -1.4647784233093262, "logps/rejected": -1.8111674785614014, "loss": 2.3963, "nll_loss": 0.5456159710884094, "rewards/accuracies": 0.625, "rewards/chosen": -0.1464778333902359, "rewards/margins": 0.0346389040350914, "rewards/rejected": -0.1811167299747467, "step": 589 }, { "epoch": 1.5557020435069215, "grad_norm": 10.246232032775879, "learning_rate": 3.882874889086069e-06, "log_odds_chosen": 0.4314562678337097, "log_odds_ratio": -0.5253216028213501, "logits/chosen": -1.1093804836273193, "logits/rejected": -1.0065696239471436, "logps/chosen": -1.4911247491836548, "logps/rejected": -1.8563570976257324, "loss": 2.6862, "nll_loss": 0.6190296411514282, "rewards/accuracies": 0.875, "rewards/chosen": -0.1491124927997589, "rewards/margins": 0.03652321547269821, "rewards/rejected": -0.18563568592071533, "step": 590 }, { "epoch": 1.5583388266315095, "grad_norm": 10.102618217468262, "learning_rate": 3.875776397515528e-06, "log_odds_chosen": 0.533229410648346, "log_odds_ratio": -0.4733944535255432, "logits/chosen": -1.0562225580215454, "logits/rejected": -0.9820581078529358, "logps/chosen": -1.501032829284668, "logps/rejected": -1.9365155696868896, "loss": 2.2097, "nll_loss": 0.5050868391990662, "rewards/accuracies": 0.875, "rewards/chosen": -0.15010327100753784, "rewards/margins": 0.04354827478528023, "rewards/rejected": -0.19365155696868896, "step": 591 }, { "epoch": 1.5609756097560976, "grad_norm": 9.822199821472168, "learning_rate": 3.868677905944986e-06, "log_odds_chosen": 0.15929587185382843, "log_odds_ratio": -0.6341037750244141, "logits/chosen": -1.176448941230774, "logits/rejected": -1.0918364524841309, "logps/chosen": -1.3583776950836182, "logps/rejected": -1.4901213645935059, "loss": 2.5741, "nll_loss": 0.5801116228103638, "rewards/accuracies": 0.5, "rewards/chosen": -0.13583777844905853, "rewards/margins": 0.013174369931221008, "rewards/rejected": -0.14901213347911835, "step": 592 }, { "epoch": 1.5636123928806855, "grad_norm": 9.36978530883789, "learning_rate": 3.861579414374445e-06, "log_odds_chosen": 0.6743534803390503, "log_odds_ratio": -0.42430543899536133, "logits/chosen": -1.0748366117477417, "logits/rejected": -0.9873178005218506, "logps/chosen": -1.2358392477035522, "logps/rejected": -1.743302345275879, "loss": 1.9504, "nll_loss": 0.4451683759689331, "rewards/accuracies": 1.0, "rewards/chosen": -0.12358392775058746, "rewards/margins": 0.05074630305171013, "rewards/rejected": -0.1743302345275879, "step": 593 }, { "epoch": 1.5662491760052735, "grad_norm": 10.640802383422852, "learning_rate": 3.854480922803904e-06, "log_odds_chosen": 0.29617777466773987, "log_odds_ratio": -0.5645712614059448, "logits/chosen": -1.1450952291488647, "logits/rejected": -1.1125893592834473, "logps/chosen": -1.419756293296814, "logps/rejected": -1.653576374053955, "loss": 2.7612, "nll_loss": 0.6338387131690979, "rewards/accuracies": 0.875, "rewards/chosen": -0.14197564125061035, "rewards/margins": 0.023382004350423813, "rewards/rejected": -0.16535764932632446, "step": 594 }, { "epoch": 1.5688859591298616, "grad_norm": 10.288469314575195, "learning_rate": 3.847382431233363e-06, "log_odds_chosen": 0.47418349981307983, "log_odds_ratio": -0.48888635635375977, "logits/chosen": -1.091731309890747, "logits/rejected": -1.0084561109542847, "logps/chosen": -1.4172899723052979, "logps/rejected": -1.795617699623108, "loss": 2.3343, "nll_loss": 0.5346803069114685, "rewards/accuracies": 1.0, "rewards/chosen": -0.14172901213169098, "rewards/margins": 0.037832751870155334, "rewards/rejected": -0.1795617640018463, "step": 595 }, { "epoch": 1.5715227422544495, "grad_norm": 10.708169937133789, "learning_rate": 3.840283939662822e-06, "log_odds_chosen": 0.373151034116745, "log_odds_ratio": -0.5381395816802979, "logits/chosen": -1.1400319337844849, "logits/rejected": -0.9808732867240906, "logps/chosen": -1.4813158512115479, "logps/rejected": -1.7744696140289307, "loss": 2.7531, "nll_loss": 0.634470522403717, "rewards/accuracies": 0.875, "rewards/chosen": -0.1481315791606903, "rewards/margins": 0.02931538037955761, "rewards/rejected": -0.17744696140289307, "step": 596 }, { "epoch": 1.5741595253790375, "grad_norm": 10.199514389038086, "learning_rate": 3.8331854480922806e-06, "log_odds_chosen": 0.514219343662262, "log_odds_ratio": -0.4816381335258484, "logits/chosen": -1.1541380882263184, "logits/rejected": -1.0266348123550415, "logps/chosen": -1.3262834548950195, "logps/rejected": -1.7207541465759277, "loss": 2.3532, "nll_loss": 0.5401394367218018, "rewards/accuracies": 1.0, "rewards/chosen": -0.13262835144996643, "rewards/margins": 0.03944707661867142, "rewards/rejected": -0.17207542061805725, "step": 597 }, { "epoch": 1.5767963085036256, "grad_norm": 9.82446575164795, "learning_rate": 3.8260869565217395e-06, "log_odds_chosen": 0.33211588859558105, "log_odds_ratio": -0.5527073740959167, "logits/chosen": -1.0964913368225098, "logits/rejected": -1.0414520502090454, "logps/chosen": -1.3519885540008545, "logps/rejected": -1.598536729812622, "loss": 2.4916, "nll_loss": 0.5676273107528687, "rewards/accuracies": 0.75, "rewards/chosen": -0.13519886136054993, "rewards/margins": 0.024654816836118698, "rewards/rejected": -0.15985366702079773, "step": 598 }, { "epoch": 1.5794330916282135, "grad_norm": 10.105400085449219, "learning_rate": 3.8189884649511975e-06, "log_odds_chosen": 0.36954590678215027, "log_odds_ratio": -0.5429477691650391, "logits/chosen": -1.1735734939575195, "logits/rejected": -1.0776454210281372, "logps/chosen": -1.401150107383728, "logps/rejected": -1.6895008087158203, "loss": 2.8135, "nll_loss": 0.64908766746521, "rewards/accuracies": 0.875, "rewards/chosen": -0.14011500775814056, "rewards/margins": 0.028835097327828407, "rewards/rejected": -0.16895011067390442, "step": 599 }, { "epoch": 1.5820698747528015, "grad_norm": 8.94379997253418, "learning_rate": 3.811889973380657e-06, "log_odds_chosen": 0.38840794563293457, "log_odds_ratio": -0.5276702642440796, "logits/chosen": -1.0334696769714355, "logits/rejected": -0.9777562618255615, "logps/chosen": -1.1394922733306885, "logps/rejected": -1.4258663654327393, "loss": 1.7502, "nll_loss": 0.38477957248687744, "rewards/accuracies": 1.0, "rewards/chosen": -0.1139492467045784, "rewards/margins": 0.028637398034334183, "rewards/rejected": -0.14258664846420288, "step": 600 }, { "epoch": 1.5847066578773896, "grad_norm": 10.189424514770508, "learning_rate": 3.804791481810115e-06, "log_odds_chosen": 0.5923186540603638, "log_odds_ratio": -0.4600526690483093, "logits/chosen": -1.0624918937683105, "logits/rejected": -0.9810973405838013, "logps/chosen": -1.3630549907684326, "logps/rejected": -1.8275552988052368, "loss": 2.5046, "nll_loss": 0.5801517367362976, "rewards/accuracies": 0.875, "rewards/chosen": -0.13630549609661102, "rewards/margins": 0.04645005241036415, "rewards/rejected": -0.18275552988052368, "step": 601 }, { "epoch": 1.5873434410019776, "grad_norm": 8.882495880126953, "learning_rate": 3.797692990239574e-06, "log_odds_chosen": 0.5100771188735962, "log_odds_ratio": -0.4908636808395386, "logits/chosen": -1.0376583337783813, "logits/rejected": -0.9982375502586365, "logps/chosen": -1.1316465139389038, "logps/rejected": -1.525282621383667, "loss": 1.6229, "nll_loss": 0.35662680864334106, "rewards/accuracies": 0.875, "rewards/chosen": -0.11316464841365814, "rewards/margins": 0.03936360031366348, "rewards/rejected": -0.15252825617790222, "step": 602 }, { "epoch": 1.5899802241265655, "grad_norm": 9.369660377502441, "learning_rate": 3.7905944986690327e-06, "log_odds_chosen": 0.918658435344696, "log_odds_ratio": -0.3525279760360718, "logits/chosen": -1.0274196863174438, "logits/rejected": -0.9446275234222412, "logps/chosen": -1.2396726608276367, "logps/rejected": -1.971005916595459, "loss": 1.6603, "nll_loss": 0.37981170415878296, "rewards/accuracies": 1.0, "rewards/chosen": -0.12396727502346039, "rewards/margins": 0.07313331961631775, "rewards/rejected": -0.19710060954093933, "step": 603 }, { "epoch": 1.5926170072511536, "grad_norm": 10.286942481994629, "learning_rate": 3.783496007098491e-06, "log_odds_chosen": 0.413002073764801, "log_odds_ratio": -0.5234980583190918, "logits/chosen": -1.0057356357574463, "logits/rejected": -1.0041232109069824, "logps/chosen": -1.2989201545715332, "logps/rejected": -1.6251273155212402, "loss": 1.8832, "nll_loss": 0.4184497594833374, "rewards/accuracies": 0.875, "rewards/chosen": -0.1298920214176178, "rewards/margins": 0.03262072056531906, "rewards/rejected": -0.16251273453235626, "step": 604 }, { "epoch": 1.5952537903757416, "grad_norm": 9.780966758728027, "learning_rate": 3.77639751552795e-06, "log_odds_chosen": 0.5441007018089294, "log_odds_ratio": -0.4700562357902527, "logits/chosen": -1.1455645561218262, "logits/rejected": -1.0333596467971802, "logps/chosen": -1.3926197290420532, "logps/rejected": -1.8256686925888062, "loss": 2.4827, "nll_loss": 0.5736803412437439, "rewards/accuracies": 1.0, "rewards/chosen": -0.13926197588443756, "rewards/margins": 0.04330489784479141, "rewards/rejected": -0.18256688117980957, "step": 605 }, { "epoch": 1.5978905735003295, "grad_norm": 9.922493934631348, "learning_rate": 3.769299023957409e-06, "log_odds_chosen": 0.3742057979106903, "log_odds_ratio": -0.5399380922317505, "logits/chosen": -1.1849782466888428, "logits/rejected": -0.9935852885246277, "logps/chosen": -1.4529054164886475, "logps/rejected": -1.7377817630767822, "loss": 2.6458, "nll_loss": 0.6074674725532532, "rewards/accuracies": 0.75, "rewards/chosen": -0.1452905535697937, "rewards/margins": 0.02848763018846512, "rewards/rejected": -0.17377817630767822, "step": 606 }, { "epoch": 1.6005273566249176, "grad_norm": 9.23854923248291, "learning_rate": 3.762200532386868e-06, "log_odds_chosen": 0.41105201840400696, "log_odds_ratio": -0.5460788607597351, "logits/chosen": -1.0429350137710571, "logits/rejected": -0.9836790561676025, "logps/chosen": -1.264663577079773, "logps/rejected": -1.5987305641174316, "loss": 2.1045, "nll_loss": 0.47150543332099915, "rewards/accuracies": 0.75, "rewards/chosen": -0.12646636366844177, "rewards/margins": 0.03340669721364975, "rewards/rejected": -0.15987306833267212, "step": 607 }, { "epoch": 1.6031641397495056, "grad_norm": 10.25296688079834, "learning_rate": 3.7551020408163264e-06, "log_odds_chosen": 0.4063701331615448, "log_odds_ratio": -0.5314669013023376, "logits/chosen": -1.085684061050415, "logits/rejected": -0.9549438953399658, "logps/chosen": -1.543460726737976, "logps/rejected": -1.8915095329284668, "loss": 2.8871, "nll_loss": 0.6686206459999084, "rewards/accuracies": 0.75, "rewards/chosen": -0.15434607863426208, "rewards/margins": 0.03480488061904907, "rewards/rejected": -0.18915095925331116, "step": 608 }, { "epoch": 1.6058009228740935, "grad_norm": 10.205924987792969, "learning_rate": 3.748003549245785e-06, "log_odds_chosen": 0.4665653109550476, "log_odds_ratio": -0.515164852142334, "logits/chosen": -1.05836021900177, "logits/rejected": -1.031651258468628, "logps/chosen": -1.393672227859497, "logps/rejected": -1.7077929973602295, "loss": 2.4482, "nll_loss": 0.5605322122573853, "rewards/accuracies": 0.875, "rewards/chosen": -0.1393672227859497, "rewards/margins": 0.03141207620501518, "rewards/rejected": -0.170779287815094, "step": 609 }, { "epoch": 1.6084377059986816, "grad_norm": 9.898493766784668, "learning_rate": 3.7409050576752437e-06, "log_odds_chosen": 0.41122180223464966, "log_odds_ratio": -0.514970600605011, "logits/chosen": -1.1111037731170654, "logits/rejected": -1.0361148118972778, "logps/chosen": -1.3642151355743408, "logps/rejected": -1.6770347356796265, "loss": 2.3191, "nll_loss": 0.528271496295929, "rewards/accuracies": 1.0, "rewards/chosen": -0.13642151653766632, "rewards/margins": 0.031281955540180206, "rewards/rejected": -0.16770347952842712, "step": 610 }, { "epoch": 1.6110744891232696, "grad_norm": 9.282876968383789, "learning_rate": 3.7338065661047026e-06, "log_odds_chosen": 0.6254175901412964, "log_odds_ratio": -0.43851619958877563, "logits/chosen": -1.1438246965408325, "logits/rejected": -1.0386680364608765, "logps/chosen": -1.3083348274230957, "logps/rejected": -1.8028080463409424, "loss": 2.2901, "nll_loss": 0.5286656618118286, "rewards/accuracies": 1.0, "rewards/chosen": -0.1308334767818451, "rewards/margins": 0.04944733530282974, "rewards/rejected": -0.18028083443641663, "step": 611 }, { "epoch": 1.6137112722478575, "grad_norm": 10.03630542755127, "learning_rate": 3.726708074534161e-06, "log_odds_chosen": 0.7499800324440002, "log_odds_ratio": -0.44167301058769226, "logits/chosen": -1.148216724395752, "logits/rejected": -1.0289642810821533, "logps/chosen": -1.3351669311523438, "logps/rejected": -1.9622267484664917, "loss": 2.2255, "nll_loss": 0.5122069120407104, "rewards/accuracies": 1.0, "rewards/chosen": -0.13351669907569885, "rewards/margins": 0.06270598620176315, "rewards/rejected": -0.1962226778268814, "step": 612 }, { "epoch": 1.6163480553724456, "grad_norm": 10.207561492919922, "learning_rate": 3.71960958296362e-06, "log_odds_chosen": 0.5388544797897339, "log_odds_ratio": -0.46620678901672363, "logits/chosen": -1.0826255083084106, "logits/rejected": -1.0181787014007568, "logps/chosen": -1.3552072048187256, "logps/rejected": -1.7654448747634888, "loss": 2.3471, "nll_loss": 0.5401521325111389, "rewards/accuracies": 1.0, "rewards/chosen": -0.13552072644233704, "rewards/margins": 0.041023775935173035, "rewards/rejected": -0.17654448747634888, "step": 613 }, { "epoch": 1.6189848384970338, "grad_norm": 9.864588737487793, "learning_rate": 3.712511091393079e-06, "log_odds_chosen": 0.5364480018615723, "log_odds_ratio": -0.4759971797466278, "logits/chosen": -1.0452721118927002, "logits/rejected": -0.9847467541694641, "logps/chosen": -1.3223601579666138, "logps/rejected": -1.7249159812927246, "loss": 2.3345, "nll_loss": 0.5360279083251953, "rewards/accuracies": 0.75, "rewards/chosen": -0.13223600387573242, "rewards/margins": 0.0402555987238884, "rewards/rejected": -0.17249161005020142, "step": 614 }, { "epoch": 1.6216216216216215, "grad_norm": 10.773433685302734, "learning_rate": 3.705412599822538e-06, "log_odds_chosen": 0.2956734895706177, "log_odds_ratio": -0.5695884823799133, "logits/chosen": -1.180643081665039, "logits/rejected": -1.0363719463348389, "logps/chosen": -1.4985196590423584, "logps/rejected": -1.7395797967910767, "loss": 3.0056, "nll_loss": 0.6944411396980286, "rewards/accuracies": 0.875, "rewards/chosen": -0.1498519778251648, "rewards/margins": 0.024106010794639587, "rewards/rejected": -0.1739579737186432, "step": 615 }, { "epoch": 1.6242584047462096, "grad_norm": 9.767648696899414, "learning_rate": 3.6983141082519963e-06, "log_odds_chosen": 0.5799336433410645, "log_odds_ratio": -0.5387483835220337, "logits/chosen": -1.058786153793335, "logits/rejected": -0.9933582544326782, "logps/chosen": -1.226000428199768, "logps/rejected": -1.7379118204116821, "loss": 1.9045, "nll_loss": 0.42225104570388794, "rewards/accuracies": 0.875, "rewards/chosen": -0.12260004132986069, "rewards/margins": 0.05119115114212036, "rewards/rejected": -0.17379119992256165, "step": 616 }, { "epoch": 1.6268951878707978, "grad_norm": 9.380396842956543, "learning_rate": 3.691215616681455e-06, "log_odds_chosen": 0.7051491737365723, "log_odds_ratio": -0.43674933910369873, "logits/chosen": -1.0409693717956543, "logits/rejected": -0.9513552188873291, "logps/chosen": -1.3766884803771973, "logps/rejected": -1.9633386135101318, "loss": 1.7689, "nll_loss": 0.3985414505004883, "rewards/accuracies": 0.875, "rewards/chosen": -0.13766884803771973, "rewards/margins": 0.058665018528699875, "rewards/rejected": -0.1963338702917099, "step": 617 }, { "epoch": 1.6295319709953855, "grad_norm": 10.475168228149414, "learning_rate": 3.6841171251109137e-06, "log_odds_chosen": 0.5606287717819214, "log_odds_ratio": -0.4687998294830322, "logits/chosen": -1.099635124206543, "logits/rejected": -1.0176622867584229, "logps/chosen": -1.3984782695770264, "logps/rejected": -1.842691421508789, "loss": 2.3243, "nll_loss": 0.5342031121253967, "rewards/accuracies": 1.0, "rewards/chosen": -0.13984781503677368, "rewards/margins": 0.044421326369047165, "rewards/rejected": -0.18426913022994995, "step": 618 }, { "epoch": 1.6321687541199736, "grad_norm": 11.261204719543457, "learning_rate": 3.677018633540372e-06, "log_odds_chosen": 0.3890281617641449, "log_odds_ratio": -0.5289594531059265, "logits/chosen": -1.129044771194458, "logits/rejected": -1.0024116039276123, "logps/chosen": -1.541863203048706, "logps/rejected": -1.8613746166229248, "loss": 2.7641, "nll_loss": 0.638132631778717, "rewards/accuracies": 0.875, "rewards/chosen": -0.15418632328510284, "rewards/margins": 0.03195112943649292, "rewards/rejected": -0.18613745272159576, "step": 619 }, { "epoch": 1.6348055372445618, "grad_norm": 10.402582168579102, "learning_rate": 3.669920141969831e-06, "log_odds_chosen": 0.18877491354942322, "log_odds_ratio": -0.604672908782959, "logits/chosen": -1.2087739706039429, "logits/rejected": -1.108641505241394, "logps/chosen": -1.398322582244873, "logps/rejected": -1.5473822355270386, "loss": 2.986, "nll_loss": 0.6860237717628479, "rewards/accuracies": 1.0, "rewards/chosen": -0.1398322582244873, "rewards/margins": 0.01490597240626812, "rewards/rejected": -0.15473823249340057, "step": 620 }, { "epoch": 1.6374423203691495, "grad_norm": 11.109515190124512, "learning_rate": 3.66282165039929e-06, "log_odds_chosen": 0.43903154134750366, "log_odds_ratio": -0.5070865750312805, "logits/chosen": -1.1722849607467651, "logits/rejected": -1.0782361030578613, "logps/chosen": -1.3067762851715088, "logps/rejected": -1.6321287155151367, "loss": 2.6851, "nll_loss": 0.620557427406311, "rewards/accuracies": 1.0, "rewards/chosen": -0.13067764043807983, "rewards/margins": 0.03253525123000145, "rewards/rejected": -0.16321289539337158, "step": 621 }, { "epoch": 1.6400791034937376, "grad_norm": 11.161482810974121, "learning_rate": 3.655723158828749e-06, "log_odds_chosen": 0.3513215184211731, "log_odds_ratio": -0.5436182022094727, "logits/chosen": -1.08698308467865, "logits/rejected": -1.0150874853134155, "logps/chosen": -1.435055136680603, "logps/rejected": -1.711066722869873, "loss": 3.0174, "nll_loss": 0.699986457824707, "rewards/accuracies": 0.875, "rewards/chosen": -0.1435055136680603, "rewards/margins": 0.027601167559623718, "rewards/rejected": -0.17110668122768402, "step": 622 }, { "epoch": 1.6427158866183258, "grad_norm": 10.726851463317871, "learning_rate": 3.6486246672582073e-06, "log_odds_chosen": 0.6032743453979492, "log_odds_ratio": -0.4820668697357178, "logits/chosen": -1.1473723649978638, "logits/rejected": -0.9986626505851746, "logps/chosen": -1.5315998792648315, "logps/rejected": -2.037564754486084, "loss": 2.7735, "nll_loss": 0.6451709270477295, "rewards/accuracies": 0.875, "rewards/chosen": -0.1531599760055542, "rewards/margins": 0.050596512854099274, "rewards/rejected": -0.20375649631023407, "step": 623 }, { "epoch": 1.6453526697429135, "grad_norm": 10.222211837768555, "learning_rate": 3.641526175687666e-06, "log_odds_chosen": 0.6348525881767273, "log_odds_ratio": -0.4367481768131256, "logits/chosen": -1.122359037399292, "logits/rejected": -1.0492609739303589, "logps/chosen": -1.326279878616333, "logps/rejected": -1.822200059890747, "loss": 2.4124, "nll_loss": 0.559432864189148, "rewards/accuracies": 1.0, "rewards/chosen": -0.13262799382209778, "rewards/margins": 0.04959201067686081, "rewards/rejected": -0.182219997048378, "step": 624 }, { "epoch": 1.6479894528675016, "grad_norm": 9.407581329345703, "learning_rate": 3.634427684117125e-06, "log_odds_chosen": 0.495173841714859, "log_odds_ratio": -0.4865230321884155, "logits/chosen": -1.130155324935913, "logits/rejected": -1.0504111051559448, "logps/chosen": -1.3680860996246338, "logps/rejected": -1.7636840343475342, "loss": 2.2616, "nll_loss": 0.5167529582977295, "rewards/accuracies": 1.0, "rewards/chosen": -0.1368086189031601, "rewards/margins": 0.03955978527665138, "rewards/rejected": -0.17636841535568237, "step": 625 }, { "epoch": 1.6506262359920898, "grad_norm": 10.777320861816406, "learning_rate": 3.627329192546584e-06, "log_odds_chosen": 0.5994431376457214, "log_odds_ratio": -0.45564326643943787, "logits/chosen": -1.0558720827102661, "logits/rejected": -0.9743368029594421, "logps/chosen": -1.4363338947296143, "logps/rejected": -1.9244080781936646, "loss": 2.1948, "nll_loss": 0.5031373500823975, "rewards/accuracies": 1.0, "rewards/chosen": -0.1436333954334259, "rewards/margins": 0.04880741238594055, "rewards/rejected": -0.19244080781936646, "step": 626 }, { "epoch": 1.6532630191166775, "grad_norm": 9.53264331817627, "learning_rate": 3.620230700976042e-06, "log_odds_chosen": 0.46930187940597534, "log_odds_ratio": -0.4994354844093323, "logits/chosen": -1.1431329250335693, "logits/rejected": -1.043256402015686, "logps/chosen": -1.3216546773910522, "logps/rejected": -1.6822595596313477, "loss": 2.3107, "nll_loss": 0.5277235507965088, "rewards/accuracies": 1.0, "rewards/chosen": -0.13216547667980194, "rewards/margins": 0.03606047108769417, "rewards/rejected": -0.1682259440422058, "step": 627 }, { "epoch": 1.6558998022412657, "grad_norm": 10.413905143737793, "learning_rate": 3.613132209405501e-06, "log_odds_chosen": 0.37333759665489197, "log_odds_ratio": -0.5379760265350342, "logits/chosen": -1.1401424407958984, "logits/rejected": -1.060665249824524, "logps/chosen": -1.4060555696487427, "logps/rejected": -1.7031787633895874, "loss": 2.4641, "nll_loss": 0.5622209310531616, "rewards/accuracies": 0.75, "rewards/chosen": -0.14060556888580322, "rewards/margins": 0.02971232309937477, "rewards/rejected": -0.1703178882598877, "step": 628 }, { "epoch": 1.6585365853658538, "grad_norm": 9.83200454711914, "learning_rate": 3.60603371783496e-06, "log_odds_chosen": 0.6011961698532104, "log_odds_ratio": -0.449532151222229, "logits/chosen": -1.0809606313705444, "logits/rejected": -0.9807281494140625, "logps/chosen": -1.0397064685821533, "logps/rejected": -1.4554616212844849, "loss": 1.9358, "nll_loss": 0.43898892402648926, "rewards/accuracies": 1.0, "rewards/chosen": -0.10397064685821533, "rewards/margins": 0.04157552123069763, "rewards/rejected": -0.14554616808891296, "step": 629 }, { "epoch": 1.6611733684904415, "grad_norm": 10.28779125213623, "learning_rate": 3.5989352262644188e-06, "log_odds_chosen": 0.6239845156669617, "log_odds_ratio": -0.47742146253585815, "logits/chosen": -1.1649227142333984, "logits/rejected": -1.0624198913574219, "logps/chosen": -1.4135284423828125, "logps/rejected": -1.8926094770431519, "loss": 2.7276, "nll_loss": 0.634151816368103, "rewards/accuracies": 0.875, "rewards/chosen": -0.14135286211967468, "rewards/margins": 0.04790811240673065, "rewards/rejected": -0.18926095962524414, "step": 630 }, { "epoch": 1.6638101516150297, "grad_norm": 9.883668899536133, "learning_rate": 3.5918367346938772e-06, "log_odds_chosen": 0.6382372379302979, "log_odds_ratio": -0.4400421679019928, "logits/chosen": -1.172858715057373, "logits/rejected": -1.0197105407714844, "logps/chosen": -1.3074352741241455, "logps/rejected": -1.8067971467971802, "loss": 2.3467, "nll_loss": 0.5426616668701172, "rewards/accuracies": 1.0, "rewards/chosen": -0.13074351847171783, "rewards/margins": 0.0499361976981163, "rewards/rejected": -0.18067970871925354, "step": 631 }, { "epoch": 1.6664469347396178, "grad_norm": 9.418743133544922, "learning_rate": 3.584738243123336e-06, "log_odds_chosen": 0.840705156326294, "log_odds_ratio": -0.3766203820705414, "logits/chosen": -1.1018195152282715, "logits/rejected": -0.9619787335395813, "logps/chosen": -1.2872685194015503, "logps/rejected": -1.9614143371582031, "loss": 2.0948, "nll_loss": 0.4860471487045288, "rewards/accuracies": 1.0, "rewards/chosen": -0.12872685492038727, "rewards/margins": 0.06741458177566528, "rewards/rejected": -0.19614143669605255, "step": 632 }, { "epoch": 1.6690837178642055, "grad_norm": 10.141168594360352, "learning_rate": 3.577639751552795e-06, "log_odds_chosen": 0.45387399196624756, "log_odds_ratio": -0.507139265537262, "logits/chosen": -1.111092448234558, "logits/rejected": -1.0183961391448975, "logps/chosen": -1.3733915090560913, "logps/rejected": -1.7340645790100098, "loss": 2.2225, "nll_loss": 0.5049020051956177, "rewards/accuracies": 1.0, "rewards/chosen": -0.13733914494514465, "rewards/margins": 0.03606731444597244, "rewards/rejected": -0.1734064519405365, "step": 633 }, { "epoch": 1.6717205009887937, "grad_norm": 9.883380889892578, "learning_rate": 3.570541259982254e-06, "log_odds_chosen": 0.5376417636871338, "log_odds_ratio": -0.4772071838378906, "logits/chosen": -1.1129980087280273, "logits/rejected": -1.0229228734970093, "logps/chosen": -1.2614349126815796, "logps/rejected": -1.6619253158569336, "loss": 2.5366, "nll_loss": 0.5864204168319702, "rewards/accuracies": 1.0, "rewards/chosen": -0.12614348530769348, "rewards/margins": 0.04004904255270958, "rewards/rejected": -0.16619253158569336, "step": 634 }, { "epoch": 1.6743572841133818, "grad_norm": 10.11703872680664, "learning_rate": 3.5634427684117124e-06, "log_odds_chosen": 0.414914071559906, "log_odds_ratio": -0.5368670225143433, "logits/chosen": -1.1180551052093506, "logits/rejected": -1.0814414024353027, "logps/chosen": -1.423435926437378, "logps/rejected": -1.7301952838897705, "loss": 2.6061, "nll_loss": 0.5978296399116516, "rewards/accuracies": 0.875, "rewards/chosen": -0.14234361052513123, "rewards/margins": 0.03067592903971672, "rewards/rejected": -0.17301952838897705, "step": 635 }, { "epoch": 1.6769940672379697, "grad_norm": 10.515578269958496, "learning_rate": 3.556344276841171e-06, "log_odds_chosen": 0.5427382588386536, "log_odds_ratio": -0.4689965546131134, "logits/chosen": -1.1191020011901855, "logits/rejected": -0.9840205907821655, "logps/chosen": -1.515913724899292, "logps/rejected": -1.9586641788482666, "loss": 2.5874, "nll_loss": 0.5999395251274109, "rewards/accuracies": 0.875, "rewards/chosen": -0.15159136056900024, "rewards/margins": 0.044275060296058655, "rewards/rejected": -0.1958664357662201, "step": 636 }, { "epoch": 1.6796308503625577, "grad_norm": 9.867798805236816, "learning_rate": 3.5492457852706298e-06, "log_odds_chosen": 0.37667518854141235, "log_odds_ratio": -0.5419520735740662, "logits/chosen": -1.0526944398880005, "logits/rejected": -1.015201210975647, "logps/chosen": -1.3632912635803223, "logps/rejected": -1.6562535762786865, "loss": 2.1737, "nll_loss": 0.48923933506011963, "rewards/accuracies": 0.75, "rewards/chosen": -0.13632912933826447, "rewards/margins": 0.02929622307419777, "rewards/rejected": -0.16562534868717194, "step": 637 }, { "epoch": 1.6822676334871458, "grad_norm": 9.446643829345703, "learning_rate": 3.5421472937000883e-06, "log_odds_chosen": 0.6284515857696533, "log_odds_ratio": -0.45431938767433167, "logits/chosen": -1.0685917139053345, "logits/rejected": -1.0092133283615112, "logps/chosen": -1.367841124534607, "logps/rejected": -1.8739535808563232, "loss": 2.0596, "nll_loss": 0.4694611132144928, "rewards/accuracies": 1.0, "rewards/chosen": -0.1367841213941574, "rewards/margins": 0.050611257553100586, "rewards/rejected": -0.187395378947258, "step": 638 }, { "epoch": 1.6849044166117337, "grad_norm": 10.240177154541016, "learning_rate": 3.535048802129547e-06, "log_odds_chosen": 0.539044976234436, "log_odds_ratio": -0.5708103775978088, "logits/chosen": -1.1326308250427246, "logits/rejected": -1.0996580123901367, "logps/chosen": -1.348641037940979, "logps/rejected": -1.8032280206680298, "loss": 2.7263, "nll_loss": 0.6244887113571167, "rewards/accuracies": 0.875, "rewards/chosen": -0.13486409187316895, "rewards/margins": 0.04545869678258896, "rewards/rejected": -0.1803227961063385, "step": 639 }, { "epoch": 1.6875411997363217, "grad_norm": 10.046351432800293, "learning_rate": 3.527950310559006e-06, "log_odds_chosen": 0.47341519594192505, "log_odds_ratio": -0.5111666321754456, "logits/chosen": -1.1026053428649902, "logits/rejected": -1.0265198945999146, "logps/chosen": -1.374668002128601, "logps/rejected": -1.7360124588012695, "loss": 2.3947, "nll_loss": 0.5475689172744751, "rewards/accuracies": 0.75, "rewards/chosen": -0.13746680319309235, "rewards/margins": 0.036134447902441025, "rewards/rejected": -0.17360125482082367, "step": 640 }, { "epoch": 1.6901779828609098, "grad_norm": 10.694351196289062, "learning_rate": 3.520851818988465e-06, "log_odds_chosen": 0.41393715143203735, "log_odds_ratio": -0.5214405059814453, "logits/chosen": -1.1589573621749878, "logits/rejected": -1.0569067001342773, "logps/chosen": -1.2749884128570557, "logps/rejected": -1.5959004163742065, "loss": 2.509, "nll_loss": 0.575117826461792, "rewards/accuracies": 1.0, "rewards/chosen": -0.12749885022640228, "rewards/margins": 0.03209120035171509, "rewards/rejected": -0.15959003567695618, "step": 641 }, { "epoch": 1.6928147659854977, "grad_norm": 10.566948890686035, "learning_rate": 3.5137533274179234e-06, "log_odds_chosen": 0.6312991380691528, "log_odds_ratio": -0.45921069383621216, "logits/chosen": -1.1022566556930542, "logits/rejected": -1.0211251974105835, "logps/chosen": -1.4295891523361206, "logps/rejected": -1.9132863283157349, "loss": 2.6099, "nll_loss": 0.6065584421157837, "rewards/accuracies": 1.0, "rewards/chosen": -0.14295890927314758, "rewards/margins": 0.04836973175406456, "rewards/rejected": -0.19132864475250244, "step": 642 }, { "epoch": 1.6954515491100857, "grad_norm": 9.972731590270996, "learning_rate": 3.5066548358473823e-06, "log_odds_chosen": 0.48382002115249634, "log_odds_ratio": -0.49584776163101196, "logits/chosen": -1.0859644412994385, "logits/rejected": -0.9947874546051025, "logps/chosen": -1.409611701965332, "logps/rejected": -1.791120171546936, "loss": 2.2081, "nll_loss": 0.502432644367218, "rewards/accuracies": 0.75, "rewards/chosen": -0.1409611701965332, "rewards/margins": 0.038150854408741, "rewards/rejected": -0.1791120171546936, "step": 643 }, { "epoch": 1.6980883322346738, "grad_norm": 9.329258918762207, "learning_rate": 3.4995563442768412e-06, "log_odds_chosen": 0.4627387821674347, "log_odds_ratio": -0.4984897971153259, "logits/chosen": -1.1354320049285889, "logits/rejected": -1.0707372426986694, "logps/chosen": -1.1951978206634521, "logps/rejected": -1.5313777923583984, "loss": 2.1396, "nll_loss": 0.4850457012653351, "rewards/accuracies": 1.0, "rewards/chosen": -0.11951977759599686, "rewards/margins": 0.03361799567937851, "rewards/rejected": -0.15313777327537537, "step": 644 }, { "epoch": 1.7007251153592617, "grad_norm": 10.280769348144531, "learning_rate": 3.4924578527062997e-06, "log_odds_chosen": 0.6402459144592285, "log_odds_ratio": -0.4437893331050873, "logits/chosen": -1.133689522743225, "logits/rejected": -1.0183706283569336, "logps/chosen": -1.5126464366912842, "logps/rejected": -2.0405571460723877, "loss": 2.5914, "nll_loss": 0.6034782528877258, "rewards/accuracies": 1.0, "rewards/chosen": -0.15126465260982513, "rewards/margins": 0.05279106646776199, "rewards/rejected": -0.20405571162700653, "step": 645 }, { "epoch": 1.7033618984838497, "grad_norm": 10.134493827819824, "learning_rate": 3.485359361135758e-06, "log_odds_chosen": 0.3840335011482239, "log_odds_ratio": -0.5229801535606384, "logits/chosen": -1.0866787433624268, "logits/rejected": -1.0402109622955322, "logps/chosen": -1.4302546977996826, "logps/rejected": -1.7201024293899536, "loss": 2.6237, "nll_loss": 0.6036279201507568, "rewards/accuracies": 1.0, "rewards/chosen": -0.1430254727602005, "rewards/margins": 0.02898477017879486, "rewards/rejected": -0.17201025784015656, "step": 646 }, { "epoch": 1.7059986816084378, "grad_norm": 10.103415489196777, "learning_rate": 3.478260869565217e-06, "log_odds_chosen": 0.4003719091415405, "log_odds_ratio": -0.5274068713188171, "logits/chosen": -1.156123161315918, "logits/rejected": -1.0898571014404297, "logps/chosen": -1.3229293823242188, "logps/rejected": -1.638416051864624, "loss": 2.4792, "nll_loss": 0.5670531988143921, "rewards/accuracies": 1.0, "rewards/chosen": -0.1322929412126541, "rewards/margins": 0.031548675149679184, "rewards/rejected": -0.1638416200876236, "step": 647 }, { "epoch": 1.7086354647330257, "grad_norm": 10.304222106933594, "learning_rate": 3.471162377994676e-06, "log_odds_chosen": 0.3370283842086792, "log_odds_ratio": -0.5651724338531494, "logits/chosen": -1.086463212966919, "logits/rejected": -1.0409562587738037, "logps/chosen": -1.2855072021484375, "logps/rejected": -1.5236806869506836, "loss": 2.0769, "nll_loss": 0.46271374821662903, "rewards/accuracies": 0.625, "rewards/chosen": -0.1285507082939148, "rewards/margins": 0.023817352950572968, "rewards/rejected": -0.15236806869506836, "step": 648 }, { "epoch": 1.7112722478576137, "grad_norm": 9.503119468688965, "learning_rate": 3.464063886424135e-06, "log_odds_chosen": 0.6528380513191223, "log_odds_ratio": -0.44192853569984436, "logits/chosen": -1.104736328125, "logits/rejected": -1.0059311389923096, "logps/chosen": -1.231282114982605, "logps/rejected": -1.757354497909546, "loss": 2.0768, "nll_loss": 0.4750056862831116, "rewards/accuracies": 0.875, "rewards/chosen": -0.12312820553779602, "rewards/margins": 0.05260723829269409, "rewards/rejected": -0.1757354587316513, "step": 649 }, { "epoch": 1.7139090309822018, "grad_norm": 10.083404541015625, "learning_rate": 3.4569653948535934e-06, "log_odds_chosen": 0.5733547806739807, "log_odds_ratio": -0.46470165252685547, "logits/chosen": -1.1071587800979614, "logits/rejected": -1.0430916547775269, "logps/chosen": -1.440490484237671, "logps/rejected": -1.9038130044937134, "loss": 2.3848, "nll_loss": 0.5497271418571472, "rewards/accuracies": 1.0, "rewards/chosen": -0.1440490484237671, "rewards/margins": 0.04633225500583649, "rewards/rejected": -0.19038130342960358, "step": 650 }, { "epoch": 1.7165458141067897, "grad_norm": 10.582185745239258, "learning_rate": 3.4498669032830523e-06, "log_odds_chosen": 0.26057717204093933, "log_odds_ratio": -0.5819696187973022, "logits/chosen": -1.1635925769805908, "logits/rejected": -1.1211692094802856, "logps/chosen": -1.4908151626586914, "logps/rejected": -1.7052472829818726, "loss": 3.023, "nll_loss": 0.6975415945053101, "rewards/accuracies": 0.875, "rewards/chosen": -0.1490815281867981, "rewards/margins": 0.021443195641040802, "rewards/rejected": -0.1705247163772583, "step": 651 }, { "epoch": 1.7191825972313777, "grad_norm": 10.465653419494629, "learning_rate": 3.442768411712511e-06, "log_odds_chosen": 0.50111985206604, "log_odds_ratio": -0.4852719306945801, "logits/chosen": -1.1733665466308594, "logits/rejected": -1.0618579387664795, "logps/chosen": -1.5112289190292358, "logps/rejected": -1.9103684425354004, "loss": 2.8812, "nll_loss": 0.6717795133590698, "rewards/accuracies": 1.0, "rewards/chosen": -0.15112288296222687, "rewards/margins": 0.03991395980119705, "rewards/rejected": -0.19103685021400452, "step": 652 }, { "epoch": 1.7218193803559658, "grad_norm": 9.639845848083496, "learning_rate": 3.43566992014197e-06, "log_odds_chosen": 0.46708834171295166, "log_odds_ratio": -0.508224606513977, "logits/chosen": -1.155930519104004, "logits/rejected": -1.038315773010254, "logps/chosen": -1.2967246770858765, "logps/rejected": -1.6591012477874756, "loss": 2.3088, "nll_loss": 0.5263881683349609, "rewards/accuracies": 1.0, "rewards/chosen": -0.12967246770858765, "rewards/margins": 0.036237671971321106, "rewards/rejected": -0.16591013967990875, "step": 653 }, { "epoch": 1.7244561634805537, "grad_norm": 10.27575969696045, "learning_rate": 3.428571428571428e-06, "log_odds_chosen": 0.40247106552124023, "log_odds_ratio": -0.5232669115066528, "logits/chosen": -1.174773931503296, "logits/rejected": -1.116478681564331, "logps/chosen": -1.394790530204773, "logps/rejected": -1.696317195892334, "loss": 2.5231, "nll_loss": 0.5784451961517334, "rewards/accuracies": 1.0, "rewards/chosen": -0.13947907090187073, "rewards/margins": 0.030152656137943268, "rewards/rejected": -0.1696317195892334, "step": 654 }, { "epoch": 1.7270929466051417, "grad_norm": 8.808902740478516, "learning_rate": 3.421472937000887e-06, "log_odds_chosen": 0.7139609456062317, "log_odds_ratio": -0.4108840823173523, "logits/chosen": -1.1678225994110107, "logits/rejected": -1.0610462427139282, "logps/chosen": -1.0931427478790283, "logps/rejected": -1.6255385875701904, "loss": 1.903, "nll_loss": 0.43466371297836304, "rewards/accuracies": 1.0, "rewards/chosen": -0.10931427776813507, "rewards/margins": 0.05323958396911621, "rewards/rejected": -0.16255386173725128, "step": 655 }, { "epoch": 1.7297297297297298, "grad_norm": 9.192036628723145, "learning_rate": 3.414374445430346e-06, "log_odds_chosen": 0.4236711859703064, "log_odds_ratio": -0.5087260603904724, "logits/chosen": -1.1298270225524902, "logits/rejected": -1.0678911209106445, "logps/chosen": -1.243565559387207, "logps/rejected": -1.5648874044418335, "loss": 1.9972, "nll_loss": 0.448428213596344, "rewards/accuracies": 1.0, "rewards/chosen": -0.12435655295848846, "rewards/margins": 0.032132189720869064, "rewards/rejected": -0.15648874640464783, "step": 656 }, { "epoch": 1.7323665128543178, "grad_norm": 10.163776397705078, "learning_rate": 3.4072759538598044e-06, "log_odds_chosen": 0.44237345457077026, "log_odds_ratio": -0.5055872201919556, "logits/chosen": -1.174792766571045, "logits/rejected": -0.9949660301208496, "logps/chosen": -1.4597645998001099, "logps/rejected": -1.805436134338379, "loss": 2.5862, "nll_loss": 0.5959893465042114, "rewards/accuracies": 0.875, "rewards/chosen": -0.1459764540195465, "rewards/margins": 0.034567151218652725, "rewards/rejected": -0.18054361641407013, "step": 657 }, { "epoch": 1.7350032959789057, "grad_norm": 10.466496467590332, "learning_rate": 3.4001774622892633e-06, "log_odds_chosen": 0.3946349024772644, "log_odds_ratio": -0.5299810767173767, "logits/chosen": -1.0455631017684937, "logits/rejected": -1.0237305164337158, "logps/chosen": -1.3399708271026611, "logps/rejected": -1.635288953781128, "loss": 2.9692, "nll_loss": 0.6893136501312256, "rewards/accuracies": 0.75, "rewards/chosen": -0.13399706780910492, "rewards/margins": 0.029531806707382202, "rewards/rejected": -0.16352888941764832, "step": 658 }, { "epoch": 1.7376400791034938, "grad_norm": 10.198365211486816, "learning_rate": 3.393078970718722e-06, "log_odds_chosen": 0.13144664466381073, "log_odds_ratio": -0.6374571323394775, "logits/chosen": -1.0861074924468994, "logits/rejected": -1.044995665550232, "logps/chosen": -1.3439302444458008, "logps/rejected": -1.4365148544311523, "loss": 2.4502, "nll_loss": 0.5487998723983765, "rewards/accuracies": 0.75, "rewards/chosen": -0.13439303636550903, "rewards/margins": 0.009258460253477097, "rewards/rejected": -0.14365148544311523, "step": 659 }, { "epoch": 1.7402768622280818, "grad_norm": 10.744078636169434, "learning_rate": 3.385980479148181e-06, "log_odds_chosen": 0.38164791464805603, "log_odds_ratio": -0.5427396893501282, "logits/chosen": -1.1259647607803345, "logits/rejected": -1.0375760793685913, "logps/chosen": -1.4524257183074951, "logps/rejected": -1.7625154256820679, "loss": 2.9398, "nll_loss": 0.6806696653366089, "rewards/accuracies": 0.75, "rewards/chosen": -0.1452425718307495, "rewards/margins": 0.031008977442979813, "rewards/rejected": -0.17625154554843903, "step": 660 }, { "epoch": 1.7429136453526697, "grad_norm": 10.451824188232422, "learning_rate": 3.3788819875776396e-06, "log_odds_chosen": 0.4451256990432739, "log_odds_ratio": -0.5080350637435913, "logits/chosen": -1.1427664756774902, "logits/rejected": -1.0376893281936646, "logps/chosen": -1.5254104137420654, "logps/rejected": -1.8743579387664795, "loss": 2.5769, "nll_loss": 0.5934144258499146, "rewards/accuracies": 0.875, "rewards/chosen": -0.15254104137420654, "rewards/margins": 0.03489474207162857, "rewards/rejected": -0.1874357908964157, "step": 661 }, { "epoch": 1.7455504284772578, "grad_norm": 10.607963562011719, "learning_rate": 3.3717834960070985e-06, "log_odds_chosen": 0.363365113735199, "log_odds_ratio": -0.5402311682701111, "logits/chosen": -1.1337553262710571, "logits/rejected": -1.0186294317245483, "logps/chosen": -1.41310453414917, "logps/rejected": -1.7071406841278076, "loss": 2.5018, "nll_loss": 0.5714316964149475, "rewards/accuracies": 0.875, "rewards/chosen": -0.141310453414917, "rewards/margins": 0.029403626918792725, "rewards/rejected": -0.17071406543254852, "step": 662 }, { "epoch": 1.7481872116018458, "grad_norm": 9.786578178405762, "learning_rate": 3.364685004436557e-06, "log_odds_chosen": 0.6023552417755127, "log_odds_ratio": -0.4504116177558899, "logits/chosen": -1.1151041984558105, "logits/rejected": -1.0372217893600464, "logps/chosen": -1.2382469177246094, "logps/rejected": -1.697322964668274, "loss": 2.2844, "nll_loss": 0.5260693430900574, "rewards/accuracies": 1.0, "rewards/chosen": -0.12382469326257706, "rewards/margins": 0.04590759426355362, "rewards/rejected": -0.16973230242729187, "step": 663 }, { "epoch": 1.7508239947264337, "grad_norm": 9.943042755126953, "learning_rate": 3.357586512866016e-06, "log_odds_chosen": 0.5671270489692688, "log_odds_ratio": -0.46319738030433655, "logits/chosen": -1.1743613481521606, "logits/rejected": -1.0710899829864502, "logps/chosen": -1.3550426959991455, "logps/rejected": -1.797658085823059, "loss": 2.4681, "nll_loss": 0.5707059502601624, "rewards/accuracies": 1.0, "rewards/chosen": -0.13550427556037903, "rewards/margins": 0.04426154866814613, "rewards/rejected": -0.17976582050323486, "step": 664 }, { "epoch": 1.7534607778510218, "grad_norm": 9.573928833007812, "learning_rate": 3.3504880212954743e-06, "log_odds_chosen": 0.34388411045074463, "log_odds_ratio": -0.5426332950592041, "logits/chosen": -1.1098031997680664, "logits/rejected": -1.0429176092147827, "logps/chosen": -1.3713771104812622, "logps/rejected": -1.6443697214126587, "loss": 2.3063, "nll_loss": 0.5223134160041809, "rewards/accuracies": 1.0, "rewards/chosen": -0.13713771104812622, "rewards/margins": 0.02729925885796547, "rewards/rejected": -0.1644369661808014, "step": 665 }, { "epoch": 1.7560975609756098, "grad_norm": 10.591869354248047, "learning_rate": 3.3433895297249332e-06, "log_odds_chosen": 0.2349964678287506, "log_odds_ratio": -0.5956152677536011, "logits/chosen": -1.1156516075134277, "logits/rejected": -1.0448639392852783, "logps/chosen": -1.4059268236160278, "logps/rejected": -1.5864968299865723, "loss": 2.4255, "nll_loss": 0.5468214154243469, "rewards/accuracies": 0.625, "rewards/chosen": -0.14059269428253174, "rewards/margins": 0.018056996166706085, "rewards/rejected": -0.15864968299865723, "step": 666 }, { "epoch": 1.7587343441001977, "grad_norm": 9.876556396484375, "learning_rate": 3.336291038154392e-06, "log_odds_chosen": 0.42307838797569275, "log_odds_ratio": -0.524172306060791, "logits/chosen": -1.1576995849609375, "logits/rejected": -1.0660154819488525, "logps/chosen": -1.4507625102996826, "logps/rejected": -1.7858595848083496, "loss": 2.8506, "nll_loss": 0.6602423191070557, "rewards/accuracies": 0.875, "rewards/chosen": -0.14507625997066498, "rewards/margins": 0.03350970149040222, "rewards/rejected": -0.178585946559906, "step": 667 }, { "epoch": 1.7613711272247858, "grad_norm": 10.308706283569336, "learning_rate": 3.329192546583851e-06, "log_odds_chosen": 0.4553511142730713, "log_odds_ratio": -0.49892446398735046, "logits/chosen": -1.0694628953933716, "logits/rejected": -0.959567666053772, "logps/chosen": -1.423482060432434, "logps/rejected": -1.7801345586776733, "loss": 2.2942, "nll_loss": 0.5236632227897644, "rewards/accuracies": 1.0, "rewards/chosen": -0.14234821498394012, "rewards/margins": 0.03566524386405945, "rewards/rejected": -0.17801345884799957, "step": 668 }, { "epoch": 1.7640079103493738, "grad_norm": 9.855831146240234, "learning_rate": 3.3220940550133095e-06, "log_odds_chosen": 0.5339704751968384, "log_odds_ratio": -0.4648168087005615, "logits/chosen": -1.113582730293274, "logits/rejected": -0.9964568614959717, "logps/chosen": -1.4431257247924805, "logps/rejected": -1.8742222785949707, "loss": 2.4579, "nll_loss": 0.567988932132721, "rewards/accuracies": 1.0, "rewards/chosen": -0.1443125605583191, "rewards/margins": 0.04310966283082962, "rewards/rejected": -0.1874222308397293, "step": 669 }, { "epoch": 1.7666446934739617, "grad_norm": 9.270071983337402, "learning_rate": 3.3149955634427684e-06, "log_odds_chosen": 0.7656106352806091, "log_odds_ratio": -0.39729052782058716, "logits/chosen": -1.0145714282989502, "logits/rejected": -0.9721503257751465, "logps/chosen": -1.1446309089660645, "logps/rejected": -1.6687047481536865, "loss": 1.6998, "nll_loss": 0.3852230906486511, "rewards/accuracies": 1.0, "rewards/chosen": -0.11446309089660645, "rewards/margins": 0.0524073988199234, "rewards/rejected": -0.16687047481536865, "step": 670 }, { "epoch": 1.7692814765985498, "grad_norm": 9.544934272766113, "learning_rate": 3.3078970718722273e-06, "log_odds_chosen": 0.3408544063568115, "log_odds_ratio": -0.5713138580322266, "logits/chosen": -1.1565693616867065, "logits/rejected": -1.080590009689331, "logps/chosen": -1.2859318256378174, "logps/rejected": -1.5526658296585083, "loss": 2.1821, "nll_loss": 0.48839300870895386, "rewards/accuracies": 0.75, "rewards/chosen": -0.12859319150447845, "rewards/margins": 0.026673391461372375, "rewards/rejected": -0.15526658296585083, "step": 671 }, { "epoch": 1.7719182597231378, "grad_norm": 10.113846778869629, "learning_rate": 3.3007985803016858e-06, "log_odds_chosen": 0.46357882022857666, "log_odds_ratio": -0.5100479125976562, "logits/chosen": -1.1293085813522339, "logits/rejected": -1.0153019428253174, "logps/chosen": -1.4023265838623047, "logps/rejected": -1.7749102115631104, "loss": 2.3904, "nll_loss": 0.546592652797699, "rewards/accuracies": 0.875, "rewards/chosen": -0.14023266732692719, "rewards/margins": 0.03725834935903549, "rewards/rejected": -0.17749102413654327, "step": 672 }, { "epoch": 1.7745550428477257, "grad_norm": 10.42104434967041, "learning_rate": 3.2937000887311442e-06, "log_odds_chosen": 0.2633205056190491, "log_odds_ratio": -0.5774643421173096, "logits/chosen": -1.1776223182678223, "logits/rejected": -1.0979485511779785, "logps/chosen": -1.4247124195098877, "logps/rejected": -1.639265537261963, "loss": 3.2279, "nll_loss": 0.7492352724075317, "rewards/accuracies": 0.875, "rewards/chosen": -0.14247125387191772, "rewards/margins": 0.02145530842244625, "rewards/rejected": -0.16392655670642853, "step": 673 }, { "epoch": 1.7771918259723138, "grad_norm": 10.222952842712402, "learning_rate": 3.286601597160603e-06, "log_odds_chosen": 0.6649592518806458, "log_odds_ratio": -0.429875910282135, "logits/chosen": -1.1035183668136597, "logits/rejected": -0.9902955889701843, "logps/chosen": -1.2553927898406982, "logps/rejected": -1.788110375404358, "loss": 2.124, "nll_loss": 0.4880125820636749, "rewards/accuracies": 1.0, "rewards/chosen": -0.12553928792476654, "rewards/margins": 0.05327175557613373, "rewards/rejected": -0.17881104350090027, "step": 674 }, { "epoch": 1.7798286090969018, "grad_norm": 9.873235702514648, "learning_rate": 3.279503105590062e-06, "log_odds_chosen": 0.9217997193336487, "log_odds_ratio": -0.4081748425960541, "logits/chosen": -1.1465270519256592, "logits/rejected": -0.9990952014923096, "logps/chosen": -1.3967903852462769, "logps/rejected": -2.183521270751953, "loss": 2.67, "nll_loss": 0.6266850233078003, "rewards/accuracies": 0.875, "rewards/chosen": -0.13967904448509216, "rewards/margins": 0.07867306470870972, "rewards/rejected": -0.21835210919380188, "step": 675 }, { "epoch": 1.7824653922214897, "grad_norm": 10.12005615234375, "learning_rate": 3.2724046140195205e-06, "log_odds_chosen": 0.5252061486244202, "log_odds_ratio": -0.5032826662063599, "logits/chosen": -1.140293002128601, "logits/rejected": -1.0709006786346436, "logps/chosen": -1.2078694105148315, "logps/rejected": -1.5759236812591553, "loss": 2.4712, "nll_loss": 0.5674687623977661, "rewards/accuracies": 0.875, "rewards/chosen": -0.12078694254159927, "rewards/margins": 0.03680543974041939, "rewards/rejected": -0.15759238600730896, "step": 676 }, { "epoch": 1.7851021753460778, "grad_norm": 10.111419677734375, "learning_rate": 3.2653061224489794e-06, "log_odds_chosen": 0.7717751264572144, "log_odds_ratio": -0.3943026661872864, "logits/chosen": -1.1302120685577393, "logits/rejected": -1.0362975597381592, "logps/chosen": -1.3001060485839844, "logps/rejected": -1.917264699935913, "loss": 2.2049, "nll_loss": 0.5117934942245483, "rewards/accuracies": 1.0, "rewards/chosen": -0.13001060485839844, "rewards/margins": 0.06171587109565735, "rewards/rejected": -0.1917264759540558, "step": 677 }, { "epoch": 1.7877389584706658, "grad_norm": 10.019546508789062, "learning_rate": 3.2582076308784383e-06, "log_odds_chosen": 0.4805706739425659, "log_odds_ratio": -0.5003248453140259, "logits/chosen": -1.0562057495117188, "logits/rejected": -0.979279637336731, "logps/chosen": -1.4084614515304565, "logps/rejected": -1.7890034914016724, "loss": 2.2081, "nll_loss": 0.5019981861114502, "rewards/accuracies": 0.875, "rewards/chosen": -0.1408461481332779, "rewards/margins": 0.038054209202528, "rewards/rejected": -0.1789003610610962, "step": 678 }, { "epoch": 1.7903757415952537, "grad_norm": 9.820355415344238, "learning_rate": 3.2511091393078972e-06, "log_odds_chosen": 0.7213483452796936, "log_odds_ratio": -0.4192531704902649, "logits/chosen": -1.0981171131134033, "logits/rejected": -0.9715542793273926, "logps/chosen": -1.3471983671188354, "logps/rejected": -1.9289729595184326, "loss": 2.6386, "nll_loss": 0.6177271008491516, "rewards/accuracies": 1.0, "rewards/chosen": -0.1347198337316513, "rewards/margins": 0.05817745625972748, "rewards/rejected": -0.19289728999137878, "step": 679 }, { "epoch": 1.7930125247198418, "grad_norm": 10.302452087402344, "learning_rate": 3.2440106477373553e-06, "log_odds_chosen": 0.503058910369873, "log_odds_ratio": -0.4829460382461548, "logits/chosen": -1.03379225730896, "logits/rejected": -0.9386270046234131, "logps/chosen": -1.3021060228347778, "logps/rejected": -1.692925214767456, "loss": 2.4338, "nll_loss": 0.5601641535758972, "rewards/accuracies": 1.0, "rewards/chosen": -0.13021060824394226, "rewards/margins": 0.039081912487745285, "rewards/rejected": -0.16929250955581665, "step": 680 }, { "epoch": 1.7956493078444298, "grad_norm": 10.225312232971191, "learning_rate": 3.236912156166814e-06, "log_odds_chosen": 0.6208136677742004, "log_odds_ratio": -0.45568156242370605, "logits/chosen": -1.1496500968933105, "logits/rejected": -1.0104174613952637, "logps/chosen": -1.4223476648330688, "logps/rejected": -1.9421371221542358, "loss": 2.38, "nll_loss": 0.5494294762611389, "rewards/accuracies": 1.0, "rewards/chosen": -0.14223477244377136, "rewards/margins": 0.051978956907987595, "rewards/rejected": -0.19421373307704926, "step": 681 }, { "epoch": 1.7982860909690177, "grad_norm": 10.216060638427734, "learning_rate": 3.229813664596273e-06, "log_odds_chosen": 0.4453953504562378, "log_odds_ratio": -0.5002199411392212, "logits/chosen": -1.0342788696289062, "logits/rejected": -0.9758845567703247, "logps/chosen": -1.4600276947021484, "logps/rejected": -1.8262362480163574, "loss": 2.1552, "nll_loss": 0.4887891411781311, "rewards/accuracies": 1.0, "rewards/chosen": -0.14600275456905365, "rewards/margins": 0.03662087768316269, "rewards/rejected": -0.18262363970279694, "step": 682 }, { "epoch": 1.8009228740936059, "grad_norm": 9.598742485046387, "learning_rate": 3.222715173025732e-06, "log_odds_chosen": 0.7848517894744873, "log_odds_ratio": -0.3915405571460724, "logits/chosen": -1.111115574836731, "logits/rejected": -1.0194597244262695, "logps/chosen": -1.210547685623169, "logps/rejected": -1.796340823173523, "loss": 1.9571, "nll_loss": 0.450122594833374, "rewards/accuracies": 1.0, "rewards/chosen": -0.1210547685623169, "rewards/margins": 0.05857931450009346, "rewards/rejected": -0.17963406443595886, "step": 683 }, { "epoch": 1.8035596572181938, "grad_norm": 10.945537567138672, "learning_rate": 3.2156166814551904e-06, "log_odds_chosen": 0.5689574480056763, "log_odds_ratio": -0.4607033133506775, "logits/chosen": -1.1433016061782837, "logits/rejected": -1.083170771598816, "logps/chosen": -1.33827543258667, "logps/rejected": -1.7556018829345703, "loss": 2.7518, "nll_loss": 0.64188551902771, "rewards/accuracies": 1.0, "rewards/chosen": -0.13382753729820251, "rewards/margins": 0.04173264652490616, "rewards/rejected": -0.17556019127368927, "step": 684 }, { "epoch": 1.8061964403427817, "grad_norm": 11.310013771057129, "learning_rate": 3.2085181898846493e-06, "log_odds_chosen": 0.7163268327713013, "log_odds_ratio": -0.4253354072570801, "logits/chosen": -1.195902705192566, "logits/rejected": -1.0543447732925415, "logps/chosen": -1.698023796081543, "logps/rejected": -2.255995750427246, "loss": 3.9093, "nll_loss": 0.9347800016403198, "rewards/accuracies": 0.875, "rewards/chosen": -0.16980236768722534, "rewards/margins": 0.05579721927642822, "rewards/rejected": -0.22559958696365356, "step": 685 }, { "epoch": 1.8088332234673699, "grad_norm": 9.637194633483887, "learning_rate": 3.2014196983141082e-06, "log_odds_chosen": 0.6438184380531311, "log_odds_ratio": -0.4424562454223633, "logits/chosen": -1.0405304431915283, "logits/rejected": -0.9892370104789734, "logps/chosen": -1.1752492189407349, "logps/rejected": -1.665137767791748, "loss": 1.9299, "nll_loss": 0.43822696805000305, "rewards/accuracies": 1.0, "rewards/chosen": -0.11752492189407349, "rewards/margins": 0.04898886755108833, "rewards/rejected": -0.16651378571987152, "step": 686 }, { "epoch": 1.8114700065919578, "grad_norm": 9.509078979492188, "learning_rate": 3.194321206743567e-06, "log_odds_chosen": 0.8525099754333496, "log_odds_ratio": -0.41228288412094116, "logits/chosen": -1.0589873790740967, "logits/rejected": -0.970712423324585, "logps/chosen": -1.2163913249969482, "logps/rejected": -1.9108552932739258, "loss": 1.818, "nll_loss": 0.4132769703865051, "rewards/accuracies": 1.0, "rewards/chosen": -0.12163914740085602, "rewards/margins": 0.0694463849067688, "rewards/rejected": -0.19108553230762482, "step": 687 }, { "epoch": 1.8141067897165457, "grad_norm": 10.51211166381836, "learning_rate": 3.1872227151730256e-06, "log_odds_chosen": 0.5122712850570679, "log_odds_ratio": -0.49355193972587585, "logits/chosen": -1.0940035581588745, "logits/rejected": -1.0186409950256348, "logps/chosen": -1.4527509212493896, "logps/rejected": -1.883984923362732, "loss": 2.2886, "nll_loss": 0.522806704044342, "rewards/accuracies": 0.875, "rewards/chosen": -0.14527510106563568, "rewards/margins": 0.04312339052557945, "rewards/rejected": -0.18839848041534424, "step": 688 }, { "epoch": 1.8167435728411339, "grad_norm": 10.046796798706055, "learning_rate": 3.180124223602484e-06, "log_odds_chosen": 0.5925272703170776, "log_odds_ratio": -0.4454019069671631, "logits/chosen": -1.0761380195617676, "logits/rejected": -0.9809951782226562, "logps/chosen": -1.2984421253204346, "logps/rejected": -1.7660629749298096, "loss": 2.0259, "nll_loss": 0.46194547414779663, "rewards/accuracies": 1.0, "rewards/chosen": -0.12984421849250793, "rewards/margins": 0.04676208272576332, "rewards/rejected": -0.17660629749298096, "step": 689 }, { "epoch": 1.8193803559657218, "grad_norm": 9.937018394470215, "learning_rate": 3.173025732031943e-06, "log_odds_chosen": 0.5163881778717041, "log_odds_ratio": -0.49798330664634705, "logits/chosen": -1.1294372081756592, "logits/rejected": -1.052258014678955, "logps/chosen": -1.21603262424469, "logps/rejected": -1.6259233951568604, "loss": 2.3316, "nll_loss": 0.5330907106399536, "rewards/accuracies": 0.875, "rewards/chosen": -0.12160325795412064, "rewards/margins": 0.04098907858133316, "rewards/rejected": -0.1625923365354538, "step": 690 }, { "epoch": 1.8220171390903097, "grad_norm": 9.205368995666504, "learning_rate": 3.165927240461402e-06, "log_odds_chosen": 0.6621567010879517, "log_odds_ratio": -0.4355185329914093, "logits/chosen": -1.142890453338623, "logits/rejected": -1.0354702472686768, "logps/chosen": -1.113405704498291, "logps/rejected": -1.6204802989959717, "loss": 1.9372, "nll_loss": 0.44075143337249756, "rewards/accuracies": 0.875, "rewards/chosen": -0.11134056746959686, "rewards/margins": 0.050707463175058365, "rewards/rejected": -0.16204802691936493, "step": 691 }, { "epoch": 1.8246539222148979, "grad_norm": 10.018095970153809, "learning_rate": 3.1588287488908604e-06, "log_odds_chosen": 1.0003513097763062, "log_odds_ratio": -0.37271711230278015, "logits/chosen": -1.0820488929748535, "logits/rejected": -0.9637270569801331, "logps/chosen": -1.327383279800415, "logps/rejected": -2.162868022918701, "loss": 1.9861, "nll_loss": 0.4592553675174713, "rewards/accuracies": 1.0, "rewards/chosen": -0.13273833692073822, "rewards/margins": 0.08354848623275757, "rewards/rejected": -0.2162868231534958, "step": 692 }, { "epoch": 1.8272907053394858, "grad_norm": 10.114798545837402, "learning_rate": 3.1517302573203193e-06, "log_odds_chosen": 0.6528754234313965, "log_odds_ratio": -0.43074536323547363, "logits/chosen": -1.0850951671600342, "logits/rejected": -1.0183665752410889, "logps/chosen": -1.5209827423095703, "logps/rejected": -2.047553539276123, "loss": 2.368, "nll_loss": 0.5489242672920227, "rewards/accuracies": 1.0, "rewards/chosen": -0.15209826827049255, "rewards/margins": 0.052657097578048706, "rewards/rejected": -0.20475536584854126, "step": 693 }, { "epoch": 1.8299274884640737, "grad_norm": 10.068489074707031, "learning_rate": 3.144631765749778e-06, "log_odds_chosen": 0.4577704966068268, "log_odds_ratio": -0.50062495470047, "logits/chosen": -1.0895400047302246, "logits/rejected": -0.971296489238739, "logps/chosen": -1.5363192558288574, "logps/rejected": -1.910767912864685, "loss": 2.5335, "nll_loss": 0.5833237171173096, "rewards/accuracies": 1.0, "rewards/chosen": -0.15363194048404694, "rewards/margins": 0.037444859743118286, "rewards/rejected": -0.19107678532600403, "step": 694 }, { "epoch": 1.8325642715886619, "grad_norm": 10.53513240814209, "learning_rate": 3.1375332741792366e-06, "log_odds_chosen": 0.3243619203567505, "log_odds_ratio": -0.5468412041664124, "logits/chosen": -1.164198637008667, "logits/rejected": -1.0807690620422363, "logps/chosen": -1.4449284076690674, "logps/rejected": -1.705177903175354, "loss": 2.6796, "nll_loss": 0.6152034997940063, "rewards/accuracies": 1.0, "rewards/chosen": -0.14449283480644226, "rewards/margins": 0.0260249562561512, "rewards/rejected": -0.17051780223846436, "step": 695 }, { "epoch": 1.8352010547132498, "grad_norm": 9.664958000183105, "learning_rate": 3.1304347826086955e-06, "log_odds_chosen": 0.451429158449173, "log_odds_ratio": -0.5083733201026917, "logits/chosen": -1.153422474861145, "logits/rejected": -1.094710111618042, "logps/chosen": -1.4420045614242554, "logps/rejected": -1.8038504123687744, "loss": 2.3653, "nll_loss": 0.540488600730896, "rewards/accuracies": 0.75, "rewards/chosen": -0.14420045912265778, "rewards/margins": 0.03618457168340683, "rewards/rejected": -0.1803850382566452, "step": 696 }, { "epoch": 1.8378378378378377, "grad_norm": 10.225156784057617, "learning_rate": 3.1233362910381544e-06, "log_odds_chosen": 0.2400350570678711, "log_odds_ratio": -0.5829436182975769, "logits/chosen": -1.1770219802856445, "logits/rejected": -1.1034917831420898, "logps/chosen": -1.3735429048538208, "logps/rejected": -1.5605123043060303, "loss": 2.5206, "nll_loss": 0.5718554258346558, "rewards/accuracies": 0.875, "rewards/chosen": -0.1373542845249176, "rewards/margins": 0.018696939572691917, "rewards/rejected": -0.15605121850967407, "step": 697 }, { "epoch": 1.8404746209624259, "grad_norm": 9.104494094848633, "learning_rate": 3.116237799467613e-06, "log_odds_chosen": 1.035887360572815, "log_odds_ratio": -0.4015609323978424, "logits/chosen": -1.1364988088607788, "logits/rejected": -1.0374042987823486, "logps/chosen": -1.1948455572128296, "logps/rejected": -1.992734432220459, "loss": 2.1548, "nll_loss": 0.4985518157482147, "rewards/accuracies": 1.0, "rewards/chosen": -0.1194845661520958, "rewards/margins": 0.07978887856006622, "rewards/rejected": -0.19927343726158142, "step": 698 }, { "epoch": 1.8431114040870138, "grad_norm": 9.683701515197754, "learning_rate": 3.1091393078970714e-06, "log_odds_chosen": 0.2705000042915344, "log_odds_ratio": -0.577082633972168, "logits/chosen": -1.0815887451171875, "logits/rejected": -1.0369924306869507, "logps/chosen": -1.2282958030700684, "logps/rejected": -1.437868356704712, "loss": 1.8773, "nll_loss": 0.4116290807723999, "rewards/accuracies": 0.875, "rewards/chosen": -0.12282958626747131, "rewards/margins": 0.020957253873348236, "rewards/rejected": -0.14378683269023895, "step": 699 }, { "epoch": 1.8457481872116017, "grad_norm": 9.654878616333008, "learning_rate": 3.1020408163265303e-06, "log_odds_chosen": 0.5054506063461304, "log_odds_ratio": -0.5001856684684753, "logits/chosen": -1.1152770519256592, "logits/rejected": -1.0579036474227905, "logps/chosen": -1.1892142295837402, "logps/rejected": -1.5673846006393433, "loss": 1.9948, "nll_loss": 0.44868773221969604, "rewards/accuracies": 0.875, "rewards/chosen": -0.11892141401767731, "rewards/margins": 0.037817053496837616, "rewards/rejected": -0.15673847496509552, "step": 700 }, { "epoch": 1.8483849703361899, "grad_norm": 9.699676513671875, "learning_rate": 3.094942324755989e-06, "log_odds_chosen": 0.6631026268005371, "log_odds_ratio": -0.4391542971134186, "logits/chosen": -1.0755290985107422, "logits/rejected": -0.9599955677986145, "logps/chosen": -1.3520907163619995, "logps/rejected": -1.8745989799499512, "loss": 2.0716, "nll_loss": 0.4739803969860077, "rewards/accuracies": 0.875, "rewards/chosen": -0.13520905375480652, "rewards/margins": 0.05225083604454994, "rewards/rejected": -0.18745990097522736, "step": 701 }, { "epoch": 1.8510217534607778, "grad_norm": 10.442801475524902, "learning_rate": 3.087843833185448e-06, "log_odds_chosen": 0.5106677412986755, "log_odds_ratio": -0.4874875545501709, "logits/chosen": -1.146780014038086, "logits/rejected": -1.0521918535232544, "logps/chosen": -1.4891130924224854, "logps/rejected": -1.9028270244598389, "loss": 2.6028, "nll_loss": 0.6019536256790161, "rewards/accuracies": 0.75, "rewards/chosen": -0.14891131222248077, "rewards/margins": 0.04137139022350311, "rewards/rejected": -0.1902827024459839, "step": 702 }, { "epoch": 1.8536585365853657, "grad_norm": 8.870477676391602, "learning_rate": 3.0807453416149066e-06, "log_odds_chosen": 0.824571967124939, "log_odds_ratio": -0.4329356551170349, "logits/chosen": -1.0489051342010498, "logits/rejected": -0.9817217588424683, "logps/chosen": -1.0155425071716309, "logps/rejected": -1.557959794998169, "loss": 1.6078, "nll_loss": 0.35866761207580566, "rewards/accuracies": 1.0, "rewards/chosen": -0.1015542522072792, "rewards/margins": 0.054241739213466644, "rewards/rejected": -0.15579599142074585, "step": 703 }, { "epoch": 1.8562953197099539, "grad_norm": 9.754107475280762, "learning_rate": 3.0736468500443655e-06, "log_odds_chosen": 0.5776047110557556, "log_odds_ratio": -0.46575939655303955, "logits/chosen": -1.1001160144805908, "logits/rejected": -0.9796845316886902, "logps/chosen": -1.4508471488952637, "logps/rejected": -1.9013798236846924, "loss": 2.2456, "nll_loss": 0.5148159861564636, "rewards/accuracies": 1.0, "rewards/chosen": -0.14508472383022308, "rewards/margins": 0.04505325108766556, "rewards/rejected": -0.19013796746730804, "step": 704 }, { "epoch": 1.858932102834542, "grad_norm": 10.134719848632812, "learning_rate": 3.0665483584738244e-06, "log_odds_chosen": 0.4445488750934601, "log_odds_ratio": -0.5248530507087708, "logits/chosen": -1.2013803720474243, "logits/rejected": -1.0629832744598389, "logps/chosen": -1.440731167793274, "logps/rejected": -1.7891690731048584, "loss": 2.807, "nll_loss": 0.649268388748169, "rewards/accuracies": 0.875, "rewards/chosen": -0.14407309889793396, "rewards/margins": 0.034843798726797104, "rewards/rejected": -0.17891690135002136, "step": 705 }, { "epoch": 1.8615688859591297, "grad_norm": 9.605047225952148, "learning_rate": 3.0594498669032833e-06, "log_odds_chosen": 0.6526737809181213, "log_odds_ratio": -0.43708115816116333, "logits/chosen": -1.1206607818603516, "logits/rejected": -0.9992779493331909, "logps/chosen": -1.1988224983215332, "logps/rejected": -1.6805169582366943, "loss": 2.0354, "nll_loss": 0.46514028310775757, "rewards/accuracies": 0.875, "rewards/chosen": -0.1198822557926178, "rewards/margins": 0.048169441521167755, "rewards/rejected": -0.16805168986320496, "step": 706 }, { "epoch": 1.8642056690837179, "grad_norm": 10.393851280212402, "learning_rate": 3.0523513753327413e-06, "log_odds_chosen": 0.3795328438282013, "log_odds_ratio": -0.536065399646759, "logits/chosen": -1.0506230592727661, "logits/rejected": -0.9675607085227966, "logps/chosen": -1.3011157512664795, "logps/rejected": -1.5858736038208008, "loss": 2.5173, "nll_loss": 0.5757268071174622, "rewards/accuracies": 0.875, "rewards/chosen": -0.13011157512664795, "rewards/margins": 0.02847578376531601, "rewards/rejected": -0.15858736634254456, "step": 707 }, { "epoch": 1.866842452208306, "grad_norm": 10.673075675964355, "learning_rate": 3.0452528837622002e-06, "log_odds_chosen": 0.4388372600078583, "log_odds_ratio": -0.5036283731460571, "logits/chosen": -1.0832300186157227, "logits/rejected": -0.9819395542144775, "logps/chosen": -1.3873385190963745, "logps/rejected": -1.731518268585205, "loss": 2.5007, "nll_loss": 0.5748181343078613, "rewards/accuracies": 1.0, "rewards/chosen": -0.1387338638305664, "rewards/margins": 0.03441796079277992, "rewards/rejected": -0.17315182089805603, "step": 708 }, { "epoch": 1.8694792353328937, "grad_norm": 10.285957336425781, "learning_rate": 3.038154392191659e-06, "log_odds_chosen": 0.4851961135864258, "log_odds_ratio": -0.4922325015068054, "logits/chosen": -1.1458088159561157, "logits/rejected": -1.0482499599456787, "logps/chosen": -1.318127989768982, "logps/rejected": -1.6883344650268555, "loss": 2.3074, "nll_loss": 0.5276387929916382, "rewards/accuracies": 1.0, "rewards/chosen": -0.13181281089782715, "rewards/margins": 0.03702065348625183, "rewards/rejected": -0.16883346438407898, "step": 709 }, { "epoch": 1.8721160184574819, "grad_norm": 9.493169784545898, "learning_rate": 3.031055900621118e-06, "log_odds_chosen": 0.551567554473877, "log_odds_ratio": -0.47243914008140564, "logits/chosen": -1.0503392219543457, "logits/rejected": -1.012966275215149, "logps/chosen": -1.3903131484985352, "logps/rejected": -1.8147319555282593, "loss": 1.9649, "nll_loss": 0.4439811706542969, "rewards/accuracies": 1.0, "rewards/chosen": -0.139031320810318, "rewards/margins": 0.04244187846779823, "rewards/rejected": -0.18147319555282593, "step": 710 }, { "epoch": 1.87475280158207, "grad_norm": 10.9459867477417, "learning_rate": 3.0239574090505765e-06, "log_odds_chosen": 0.4790281057357788, "log_odds_ratio": -0.49135541915893555, "logits/chosen": -1.0996241569519043, "logits/rejected": -1.0557196140289307, "logps/chosen": -1.4130548238754272, "logps/rejected": -1.7887698411941528, "loss": 2.4598, "nll_loss": 0.5658220052719116, "rewards/accuracies": 1.0, "rewards/chosen": -0.14130547642707825, "rewards/margins": 0.0375715047121048, "rewards/rejected": -0.17887699604034424, "step": 711 }, { "epoch": 1.8773895847066577, "grad_norm": 9.297354698181152, "learning_rate": 3.0168589174800354e-06, "log_odds_chosen": 0.4402661621570587, "log_odds_ratio": -0.5122517943382263, "logits/chosen": -1.086590051651001, "logits/rejected": -1.048518180847168, "logps/chosen": -1.2138535976409912, "logps/rejected": -1.5244641304016113, "loss": 2.0503, "nll_loss": 0.4613535404205322, "rewards/accuracies": 0.875, "rewards/chosen": -0.1213853657245636, "rewards/margins": 0.031061064451932907, "rewards/rejected": -0.1524464190006256, "step": 712 }, { "epoch": 1.8800263678312459, "grad_norm": 9.646504402160645, "learning_rate": 3.0097604259094943e-06, "log_odds_chosen": 0.38965076208114624, "log_odds_ratio": -0.5235826373100281, "logits/chosen": -1.107448697090149, "logits/rejected": -1.0148836374282837, "logps/chosen": -1.3409647941589355, "logps/rejected": -1.6361957788467407, "loss": 2.3443, "nll_loss": 0.5337234735488892, "rewards/accuracies": 1.0, "rewards/chosen": -0.13409648835659027, "rewards/margins": 0.029523085802793503, "rewards/rejected": -0.16361957788467407, "step": 713 }, { "epoch": 1.882663150955834, "grad_norm": 11.009034156799316, "learning_rate": 3.0026619343389528e-06, "log_odds_chosen": 0.530620813369751, "log_odds_ratio": -0.47252893447875977, "logits/chosen": -1.1298047304153442, "logits/rejected": -1.0366967916488647, "logps/chosen": -1.4072208404541016, "logps/rejected": -1.830824375152588, "loss": 2.446, "nll_loss": 0.5642455816268921, "rewards/accuracies": 0.875, "rewards/chosen": -0.14072206616401672, "rewards/margins": 0.04236038029193878, "rewards/rejected": -0.1830824315547943, "step": 714 }, { "epoch": 1.8852999340804217, "grad_norm": 9.541563034057617, "learning_rate": 2.9955634427684117e-06, "log_odds_chosen": 0.6051095724105835, "log_odds_ratio": -0.4559980034828186, "logits/chosen": -1.060058355331421, "logits/rejected": -0.9675153493881226, "logps/chosen": -1.272647738456726, "logps/rejected": -1.7453210353851318, "loss": 1.9082, "nll_loss": 0.43144115805625916, "rewards/accuracies": 0.875, "rewards/chosen": -0.12726476788520813, "rewards/margins": 0.04726734012365341, "rewards/rejected": -0.17453211545944214, "step": 715 }, { "epoch": 1.8879367172050099, "grad_norm": 9.738765716552734, "learning_rate": 2.98846495119787e-06, "log_odds_chosen": 0.5946243405342102, "log_odds_ratio": -0.4913176894187927, "logits/chosen": -1.168287754058838, "logits/rejected": -1.0590921640396118, "logps/chosen": -1.27141273021698, "logps/rejected": -1.6295111179351807, "loss": 2.5759, "nll_loss": 0.5948377847671509, "rewards/accuracies": 1.0, "rewards/chosen": -0.12714126706123352, "rewards/margins": 0.035809844732284546, "rewards/rejected": -0.16295112669467926, "step": 716 }, { "epoch": 1.890573500329598, "grad_norm": 9.53346061706543, "learning_rate": 2.981366459627329e-06, "log_odds_chosen": 0.8909972906112671, "log_odds_ratio": -0.3848392367362976, "logits/chosen": -1.010292410850525, "logits/rejected": -0.9597340822219849, "logps/chosen": -1.180947184562683, "logps/rejected": -1.8466895818710327, "loss": 2.1973, "nll_loss": 0.5108322501182556, "rewards/accuracies": 0.875, "rewards/chosen": -0.11809471994638443, "rewards/margins": 0.06657424569129944, "rewards/rejected": -0.18466898798942566, "step": 717 }, { "epoch": 1.8932102834541857, "grad_norm": 9.259313583374023, "learning_rate": 2.9742679680567875e-06, "log_odds_chosen": 0.8006938695907593, "log_odds_ratio": -0.37917834520339966, "logits/chosen": -1.073501706123352, "logits/rejected": -1.0082961320877075, "logps/chosen": -1.2161955833435059, "logps/rejected": -1.83875572681427, "loss": 1.7586, "nll_loss": 0.40172743797302246, "rewards/accuracies": 1.0, "rewards/chosen": -0.12161955237388611, "rewards/margins": 0.06225602328777313, "rewards/rejected": -0.18387556076049805, "step": 718 }, { "epoch": 1.8958470665787739, "grad_norm": 9.879798889160156, "learning_rate": 2.9671694764862464e-06, "log_odds_chosen": 0.7014466524124146, "log_odds_ratio": -0.4430071711540222, "logits/chosen": -1.1105573177337646, "logits/rejected": -0.9432505965232849, "logps/chosen": -1.3722140789031982, "logps/rejected": -1.937282919883728, "loss": 2.2508, "nll_loss": 0.5183901190757751, "rewards/accuracies": 0.75, "rewards/chosen": -0.13722142577171326, "rewards/margins": 0.056506864726543427, "rewards/rejected": -0.1937282830476761, "step": 719 }, { "epoch": 1.898483849703362, "grad_norm": 9.9389009475708, "learning_rate": 2.9600709849157053e-06, "log_odds_chosen": 0.7960854768753052, "log_odds_ratio": -0.39902588725090027, "logits/chosen": -1.1237053871154785, "logits/rejected": -1.043378233909607, "logps/chosen": -1.2699528932571411, "logps/rejected": -1.8459627628326416, "loss": 2.1568, "nll_loss": 0.4992954134941101, "rewards/accuracies": 1.0, "rewards/chosen": -0.1269952952861786, "rewards/margins": 0.05760098248720169, "rewards/rejected": -0.18459627032279968, "step": 720 }, { "epoch": 1.9011206328279497, "grad_norm": 9.857022285461426, "learning_rate": 2.9529724933451642e-06, "log_odds_chosen": 0.6493891477584839, "log_odds_ratio": -0.4351784586906433, "logits/chosen": -1.1531649827957153, "logits/rejected": -1.0092952251434326, "logps/chosen": -1.3989733457565308, "logps/rejected": -1.9283164739608765, "loss": 2.581, "nll_loss": 0.6017433404922485, "rewards/accuracies": 1.0, "rewards/chosen": -0.13989733159542084, "rewards/margins": 0.05293431133031845, "rewards/rejected": -0.19283165037631989, "step": 721 }, { "epoch": 1.903757415952538, "grad_norm": 9.490763664245605, "learning_rate": 2.9458740017746227e-06, "log_odds_chosen": 0.5713068842887878, "log_odds_ratio": -0.4703507423400879, "logits/chosen": -1.1523844003677368, "logits/rejected": -0.9938646554946899, "logps/chosen": -1.1879104375839233, "logps/rejected": -1.6090285778045654, "loss": 2.2061, "nll_loss": 0.5044786334037781, "rewards/accuracies": 1.0, "rewards/chosen": -0.11879104375839233, "rewards/margins": 0.04211181402206421, "rewards/rejected": -0.16090285778045654, "step": 722 }, { "epoch": 1.906394199077126, "grad_norm": 10.344840049743652, "learning_rate": 2.9387755102040816e-06, "log_odds_chosen": 0.6381205320358276, "log_odds_ratio": -0.4392378032207489, "logits/chosen": -1.1031138896942139, "logits/rejected": -0.9807515144348145, "logps/chosen": -1.3628013134002686, "logps/rejected": -1.865041971206665, "loss": 2.6738, "nll_loss": 0.6245163083076477, "rewards/accuracies": 0.875, "rewards/chosen": -0.1362801492214203, "rewards/margins": 0.050224047154188156, "rewards/rejected": -0.18650420010089874, "step": 723 }, { "epoch": 1.9090309822017137, "grad_norm": 10.27442741394043, "learning_rate": 2.9316770186335405e-06, "log_odds_chosen": 0.5367138981819153, "log_odds_ratio": -0.47964826226234436, "logits/chosen": -1.150720477104187, "logits/rejected": -1.040562629699707, "logps/chosen": -1.2588378190994263, "logps/rejected": -1.6655387878417969, "loss": 2.4424, "nll_loss": 0.5626363754272461, "rewards/accuracies": 1.0, "rewards/chosen": -0.1258837878704071, "rewards/margins": 0.04067010432481766, "rewards/rejected": -0.16655388474464417, "step": 724 }, { "epoch": 1.911667765326302, "grad_norm": 9.781803131103516, "learning_rate": 2.924578527062999e-06, "log_odds_chosen": 0.3105473220348358, "log_odds_ratio": -0.5560980439186096, "logits/chosen": -1.1701128482818604, "logits/rejected": -1.060251235961914, "logps/chosen": -1.3494994640350342, "logps/rejected": -1.5901248455047607, "loss": 2.7193, "nll_loss": 0.6242145299911499, "rewards/accuracies": 0.875, "rewards/chosen": -0.1349499225616455, "rewards/margins": 0.024062547832727432, "rewards/rejected": -0.15901248157024384, "step": 725 }, { "epoch": 1.91430454845089, "grad_norm": 9.695030212402344, "learning_rate": 2.9174800354924575e-06, "log_odds_chosen": 0.5412957072257996, "log_odds_ratio": -0.4844801723957062, "logits/chosen": -1.1531310081481934, "logits/rejected": -1.0898048877716064, "logps/chosen": -1.2744779586791992, "logps/rejected": -1.6676826477050781, "loss": 2.3628, "nll_loss": 0.5422565937042236, "rewards/accuracies": 1.0, "rewards/chosen": -0.12744779884815216, "rewards/margins": 0.039320461452007294, "rewards/rejected": -0.16676826775074005, "step": 726 }, { "epoch": 1.916941331575478, "grad_norm": 10.185785293579102, "learning_rate": 2.9103815439219164e-06, "log_odds_chosen": 0.5890101790428162, "log_odds_ratio": -0.44325900077819824, "logits/chosen": -1.1804449558258057, "logits/rejected": -0.9826102256774902, "logps/chosen": -1.356105089187622, "logps/rejected": -1.8139309883117676, "loss": 2.7428, "nll_loss": 0.6413748264312744, "rewards/accuracies": 1.0, "rewards/chosen": -0.13561052083969116, "rewards/margins": 0.045782577246427536, "rewards/rejected": -0.1813930869102478, "step": 727 }, { "epoch": 1.919578114700066, "grad_norm": 9.88233470916748, "learning_rate": 2.9032830523513753e-06, "log_odds_chosen": 0.688329815864563, "log_odds_ratio": -0.418424129486084, "logits/chosen": -1.0789997577667236, "logits/rejected": -1.004409909248352, "logps/chosen": -1.2447686195373535, "logps/rejected": -1.7736945152282715, "loss": 2.4874, "nll_loss": 0.5799974203109741, "rewards/accuracies": 1.0, "rewards/chosen": -0.12447687238454819, "rewards/margins": 0.052892591804265976, "rewards/rejected": -0.17736944556236267, "step": 728 }, { "epoch": 1.922214897824654, "grad_norm": 10.790196418762207, "learning_rate": 2.896184560780834e-06, "log_odds_chosen": 0.45303723216056824, "log_odds_ratio": -0.4973563551902771, "logits/chosen": -1.0952262878417969, "logits/rejected": -1.0493183135986328, "logps/chosen": -1.3963035345077515, "logps/rejected": -1.7568044662475586, "loss": 2.3031, "nll_loss": 0.5260452032089233, "rewards/accuracies": 1.0, "rewards/chosen": -0.13963034749031067, "rewards/margins": 0.03605009987950325, "rewards/rejected": -0.17568045854568481, "step": 729 }, { "epoch": 1.924851680949242, "grad_norm": 10.839725494384766, "learning_rate": 2.8890860692102926e-06, "log_odds_chosen": 0.5385131239891052, "log_odds_ratio": -0.47357553243637085, "logits/chosen": -1.1808533668518066, "logits/rejected": -1.045129418373108, "logps/chosen": -1.469390869140625, "logps/rejected": -1.9083622694015503, "loss": 2.7756, "nll_loss": 0.6465520262718201, "rewards/accuracies": 1.0, "rewards/chosen": -0.14693908393383026, "rewards/margins": 0.04389715567231178, "rewards/rejected": -0.19083623588085175, "step": 730 }, { "epoch": 1.92748846407383, "grad_norm": 9.984872817993164, "learning_rate": 2.8819875776397515e-06, "log_odds_chosen": 0.6364855170249939, "log_odds_ratio": -0.4419393539428711, "logits/chosen": -1.059470534324646, "logits/rejected": -1.0081063508987427, "logps/chosen": -1.362449049949646, "logps/rejected": -1.8734867572784424, "loss": 2.1205, "nll_loss": 0.48593536019325256, "rewards/accuracies": 1.0, "rewards/chosen": -0.13624490797519684, "rewards/margins": 0.051103778183460236, "rewards/rejected": -0.18734869360923767, "step": 731 }, { "epoch": 1.930125247198418, "grad_norm": 10.651154518127441, "learning_rate": 2.8748890860692104e-06, "log_odds_chosen": 0.5924453735351562, "log_odds_ratio": -0.4516263008117676, "logits/chosen": -1.1864819526672363, "logits/rejected": -1.0519362688064575, "logps/chosen": -1.2969294786453247, "logps/rejected": -1.7565484046936035, "loss": 2.4514, "nll_loss": 0.5676829814910889, "rewards/accuracies": 1.0, "rewards/chosen": -0.129692941904068, "rewards/margins": 0.0459618978202343, "rewards/rejected": -0.1756548434495926, "step": 732 }, { "epoch": 1.932762030323006, "grad_norm": 10.41089153289795, "learning_rate": 2.867790594498669e-06, "log_odds_chosen": 0.285758912563324, "log_odds_ratio": -0.5712098479270935, "logits/chosen": -1.2113354206085205, "logits/rejected": -1.1036522388458252, "logps/chosen": -1.4044160842895508, "logps/rejected": -1.6196887493133545, "loss": 2.7402, "nll_loss": 0.6279230117797852, "rewards/accuracies": 0.625, "rewards/chosen": -0.14044161140918732, "rewards/margins": 0.021527256816625595, "rewards/rejected": -0.1619688868522644, "step": 733 }, { "epoch": 1.935398813447594, "grad_norm": 11.022461891174316, "learning_rate": 2.8606921029281274e-06, "log_odds_chosen": 0.5275789499282837, "log_odds_ratio": -0.4706922173500061, "logits/chosen": -1.21927011013031, "logits/rejected": -1.0647447109222412, "logps/chosen": -1.4133145809173584, "logps/rejected": -1.8368372917175293, "loss": 2.6767, "nll_loss": 0.6221182942390442, "rewards/accuracies": 1.0, "rewards/chosen": -0.14133146405220032, "rewards/margins": 0.04235227406024933, "rewards/rejected": -0.18368372321128845, "step": 734 }, { "epoch": 1.938035596572182, "grad_norm": 9.407811164855957, "learning_rate": 2.8535936113575863e-06, "log_odds_chosen": 0.441721647977829, "log_odds_ratio": -0.5065779685974121, "logits/chosen": -1.0536956787109375, "logits/rejected": -0.9907861948013306, "logps/chosen": -1.1991026401519775, "logps/rejected": -1.5222476720809937, "loss": 2.0497, "nll_loss": 0.4617636799812317, "rewards/accuracies": 1.0, "rewards/chosen": -0.11991026252508163, "rewards/margins": 0.032314497977495193, "rewards/rejected": -0.15222477912902832, "step": 735 }, { "epoch": 1.94067237969677, "grad_norm": 9.999675750732422, "learning_rate": 2.846495119787045e-06, "log_odds_chosen": 0.6899769306182861, "log_odds_ratio": -0.41962409019470215, "logits/chosen": -1.1290652751922607, "logits/rejected": -1.0376579761505127, "logps/chosen": -1.4229869842529297, "logps/rejected": -1.9804730415344238, "loss": 2.2811, "nll_loss": 0.528308629989624, "rewards/accuracies": 1.0, "rewards/chosen": -0.14229869842529297, "rewards/margins": 0.0557485856115818, "rewards/rejected": -0.19804728031158447, "step": 736 }, { "epoch": 1.943309162821358, "grad_norm": 10.267180442810059, "learning_rate": 2.8393966282165037e-06, "log_odds_chosen": 0.5577402710914612, "log_odds_ratio": -0.46777093410491943, "logits/chosen": -1.0950483083724976, "logits/rejected": -1.0081474781036377, "logps/chosen": -1.3645390272140503, "logps/rejected": -1.7952313423156738, "loss": 2.704, "nll_loss": 0.6292195320129395, "rewards/accuracies": 0.875, "rewards/chosen": -0.13645391166210175, "rewards/margins": 0.043069228529930115, "rewards/rejected": -0.17952314019203186, "step": 737 }, { "epoch": 1.945945945945946, "grad_norm": 9.377684593200684, "learning_rate": 2.8322981366459626e-06, "log_odds_chosen": 0.43222254514694214, "log_odds_ratio": -0.5103168487548828, "logits/chosen": -1.0828691720962524, "logits/rejected": -1.0081210136413574, "logps/chosen": -1.3563244342803955, "logps/rejected": -1.7054436206817627, "loss": 1.9605, "nll_loss": 0.4390990138053894, "rewards/accuracies": 1.0, "rewards/chosen": -0.13563242554664612, "rewards/margins": 0.03491192311048508, "rewards/rejected": -0.1705443561077118, "step": 738 }, { "epoch": 1.948582729070534, "grad_norm": 9.630548477172852, "learning_rate": 2.8251996450754215e-06, "log_odds_chosen": 0.634854793548584, "log_odds_ratio": -0.4447363317012787, "logits/chosen": -1.116487741470337, "logits/rejected": -0.9982193112373352, "logps/chosen": -1.2106181383132935, "logps/rejected": -1.732189655303955, "loss": 2.1607, "nll_loss": 0.495696485042572, "rewards/accuracies": 0.875, "rewards/chosen": -0.12106182426214218, "rewards/margins": 0.052157141268253326, "rewards/rejected": -0.1732189655303955, "step": 739 }, { "epoch": 1.951219512195122, "grad_norm": 9.25537109375, "learning_rate": 2.8181011535048804e-06, "log_odds_chosen": 0.5245146751403809, "log_odds_ratio": -0.4931119382381439, "logits/chosen": -1.1068140268325806, "logits/rejected": -1.0467549562454224, "logps/chosen": -1.3359605073928833, "logps/rejected": -1.7490854263305664, "loss": 1.8681, "nll_loss": 0.4177016019821167, "rewards/accuracies": 0.875, "rewards/chosen": -0.1335960477590561, "rewards/margins": 0.04131249338388443, "rewards/rejected": -0.17490854859352112, "step": 740 }, { "epoch": 1.95385629531971, "grad_norm": 9.554231643676758, "learning_rate": 2.811002661934339e-06, "log_odds_chosen": 0.5873994827270508, "log_odds_ratio": -0.4573465585708618, "logits/chosen": -1.1190392971038818, "logits/rejected": -1.0469231605529785, "logps/chosen": -1.2938807010650635, "logps/rejected": -1.757972002029419, "loss": 2.1344, "nll_loss": 0.48787564039230347, "rewards/accuracies": 1.0, "rewards/chosen": -0.12938806414604187, "rewards/margins": 0.046409137547016144, "rewards/rejected": -0.1757972091436386, "step": 741 }, { "epoch": 1.956493078444298, "grad_norm": 10.868066787719727, "learning_rate": 2.8039041703637977e-06, "log_odds_chosen": 0.4728792905807495, "log_odds_ratio": -0.49270740151405334, "logits/chosen": -1.1300780773162842, "logits/rejected": -1.0287731885910034, "logps/chosen": -1.4597746133804321, "logps/rejected": -1.8425248861312866, "loss": 2.4114, "nll_loss": 0.5535825490951538, "rewards/accuracies": 0.875, "rewards/chosen": -0.1459774672985077, "rewards/margins": 0.03827501833438873, "rewards/rejected": -0.18425247073173523, "step": 742 }, { "epoch": 1.959129861568886, "grad_norm": 9.725842475891113, "learning_rate": 2.796805678793256e-06, "log_odds_chosen": 0.6654451489448547, "log_odds_ratio": -0.4607689082622528, "logits/chosen": -1.1382243633270264, "logits/rejected": -1.0733942985534668, "logps/chosen": -1.2684041261672974, "logps/rejected": -1.785618543624878, "loss": 2.2847, "nll_loss": 0.5250944495201111, "rewards/accuracies": 1.0, "rewards/chosen": -0.12684041261672974, "rewards/margins": 0.051721446216106415, "rewards/rejected": -0.17856186628341675, "step": 743 }, { "epoch": 1.961766644693474, "grad_norm": 10.576794624328613, "learning_rate": 2.789707187222715e-06, "log_odds_chosen": 0.6424548029899597, "log_odds_ratio": -0.45932537317276, "logits/chosen": -1.0593125820159912, "logits/rejected": -1.040118932723999, "logps/chosen": -1.3602181673049927, "logps/rejected": -1.8843753337860107, "loss": 2.5558, "nll_loss": 0.5930271148681641, "rewards/accuracies": 0.875, "rewards/chosen": -0.13602182269096375, "rewards/margins": 0.052415721118450165, "rewards/rejected": -0.1884375661611557, "step": 744 }, { "epoch": 1.964403427818062, "grad_norm": 10.075294494628906, "learning_rate": 2.7826086956521736e-06, "log_odds_chosen": 0.5383829474449158, "log_odds_ratio": -0.46967270970344543, "logits/chosen": -1.1858677864074707, "logits/rejected": -1.0492582321166992, "logps/chosen": -1.3368380069732666, "logps/rejected": -1.7565494775772095, "loss": 2.3507, "nll_loss": 0.5407072901725769, "rewards/accuracies": 1.0, "rewards/chosen": -0.13368380069732666, "rewards/margins": 0.04197114706039429, "rewards/rejected": -0.17565494775772095, "step": 745 }, { "epoch": 1.96704021094265, "grad_norm": 10.174092292785645, "learning_rate": 2.7755102040816325e-06, "log_odds_chosen": 0.7807490229606628, "log_odds_ratio": -0.4044077694416046, "logits/chosen": -1.0873914957046509, "logits/rejected": -0.9772408604621887, "logps/chosen": -1.2904683351516724, "logps/rejected": -1.8890689611434937, "loss": 2.3492, "nll_loss": 0.5468517541885376, "rewards/accuracies": 1.0, "rewards/chosen": -0.12904684245586395, "rewards/margins": 0.05986006557941437, "rewards/rejected": -0.18890689313411713, "step": 746 }, { "epoch": 1.969676994067238, "grad_norm": 10.78231143951416, "learning_rate": 2.7684117125110914e-06, "log_odds_chosen": 0.29232144355773926, "log_odds_ratio": -0.5633444786071777, "logits/chosen": -1.1790039539337158, "logits/rejected": -1.0135250091552734, "logps/chosen": -1.4243428707122803, "logps/rejected": -1.6583789587020874, "loss": 2.8697, "nll_loss": 0.6611008644104004, "rewards/accuracies": 1.0, "rewards/chosen": -0.1424342840909958, "rewards/margins": 0.02340361848473549, "rewards/rejected": -0.16583789885044098, "step": 747 }, { "epoch": 1.972313777191826, "grad_norm": 10.888761520385742, "learning_rate": 2.7613132209405503e-06, "log_odds_chosen": 0.7005020380020142, "log_odds_ratio": -0.42932698130607605, "logits/chosen": -1.1515930891036987, "logits/rejected": -1.0647493600845337, "logps/chosen": -1.4700562953948975, "logps/rejected": -2.058032274246216, "loss": 2.4503, "nll_loss": 0.5696460008621216, "rewards/accuracies": 1.0, "rewards/chosen": -0.14700563251972198, "rewards/margins": 0.05879759415984154, "rewards/rejected": -0.20580321550369263, "step": 748 }, { "epoch": 1.974950560316414, "grad_norm": 10.145458221435547, "learning_rate": 2.7542147293700088e-06, "log_odds_chosen": 0.39581814408302307, "log_odds_ratio": -0.5312362909317017, "logits/chosen": -1.1222620010375977, "logits/rejected": -1.020433783531189, "logps/chosen": -1.45713472366333, "logps/rejected": -1.7835485935211182, "loss": 2.3973, "nll_loss": 0.5462045073509216, "rewards/accuracies": 0.875, "rewards/chosen": -0.14571347832679749, "rewards/margins": 0.03264138102531433, "rewards/rejected": -0.17835485935211182, "step": 749 }, { "epoch": 1.977587343441002, "grad_norm": 9.972173690795898, "learning_rate": 2.7471162377994677e-06, "log_odds_chosen": 0.5671351552009583, "log_odds_ratio": -0.49810683727264404, "logits/chosen": -1.018516182899475, "logits/rejected": -0.9507037401199341, "logps/chosen": -1.3662822246551514, "logps/rejected": -1.8222932815551758, "loss": 2.4143, "nll_loss": 0.5537749528884888, "rewards/accuracies": 0.75, "rewards/chosen": -0.13662822544574738, "rewards/margins": 0.045601099729537964, "rewards/rejected": -0.18222934007644653, "step": 750 }, { "epoch": 1.98022412656559, "grad_norm": 8.758198738098145, "learning_rate": 2.740017746228926e-06, "log_odds_chosen": 0.4473654627799988, "log_odds_ratio": -0.5166757106781006, "logits/chosen": -1.0452587604522705, "logits/rejected": -0.9860420823097229, "logps/chosen": -1.2896453142166138, "logps/rejected": -1.6250877380371094, "loss": 1.6328, "nll_loss": 0.35654035210609436, "rewards/accuracies": 1.0, "rewards/chosen": -0.12896452844142914, "rewards/margins": 0.03354424238204956, "rewards/rejected": -0.1625087708234787, "step": 751 }, { "epoch": 1.982860909690178, "grad_norm": 9.59146785736084, "learning_rate": 2.7329192546583846e-06, "log_odds_chosen": 0.3367576599121094, "log_odds_ratio": -0.5577321648597717, "logits/chosen": -1.1110894680023193, "logits/rejected": -1.0275874137878418, "logps/chosen": -1.2562785148620605, "logps/rejected": -1.5166959762573242, "loss": 2.2225, "nll_loss": 0.4998584985733032, "rewards/accuracies": 0.75, "rewards/chosen": -0.12562784552574158, "rewards/margins": 0.026041746139526367, "rewards/rejected": -0.15166959166526794, "step": 752 }, { "epoch": 1.985497692814766, "grad_norm": 11.306876182556152, "learning_rate": 2.7258207630878435e-06, "log_odds_chosen": 0.9240682721138, "log_odds_ratio": -0.45148539543151855, "logits/chosen": -1.0912346839904785, "logits/rejected": -0.9852017164230347, "logps/chosen": -1.3862550258636475, "logps/rejected": -2.1982877254486084, "loss": 2.8986, "nll_loss": 0.6795053482055664, "rewards/accuracies": 0.875, "rewards/chosen": -0.13862551748752594, "rewards/margins": 0.08120325952768326, "rewards/rejected": -0.2198287844657898, "step": 753 }, { "epoch": 1.988134475939354, "grad_norm": 9.992732048034668, "learning_rate": 2.7187222715173024e-06, "log_odds_chosen": 0.5552957057952881, "log_odds_ratio": -0.46775341033935547, "logits/chosen": -1.106242299079895, "logits/rejected": -0.9955162405967712, "logps/chosen": -1.2407417297363281, "logps/rejected": -1.6577999591827393, "loss": 2.0104, "nll_loss": 0.4558298885822296, "rewards/accuracies": 1.0, "rewards/chosen": -0.12407416850328445, "rewards/margins": 0.041705839335918427, "rewards/rejected": -0.16578000783920288, "step": 754 }, { "epoch": 1.990771259063942, "grad_norm": 9.361556053161621, "learning_rate": 2.7116237799467613e-06, "log_odds_chosen": 0.6962761878967285, "log_odds_ratio": -0.417205274105072, "logits/chosen": -1.049082636833191, "logits/rejected": -0.996416449546814, "logps/chosen": -1.2697757482528687, "logps/rejected": -1.797317624092102, "loss": 1.8086, "nll_loss": 0.4104408323764801, "rewards/accuracies": 1.0, "rewards/chosen": -0.1269775778055191, "rewards/margins": 0.052754178643226624, "rewards/rejected": -0.17973175644874573, "step": 755 }, { "epoch": 1.99340804218853, "grad_norm": 10.411174774169922, "learning_rate": 2.7045252883762198e-06, "log_odds_chosen": 0.3635842800140381, "log_odds_ratio": -0.5335854291915894, "logits/chosen": -1.0935719013214111, "logits/rejected": -1.046020269393921, "logps/chosen": -1.4086623191833496, "logps/rejected": -1.6860154867172241, "loss": 2.5162, "nll_loss": 0.5756828188896179, "rewards/accuracies": 1.0, "rewards/chosen": -0.1408662348985672, "rewards/margins": 0.027735330164432526, "rewards/rejected": -0.16860157251358032, "step": 756 }, { "epoch": 1.996044825313118, "grad_norm": 9.917908668518066, "learning_rate": 2.6974267968056787e-06, "log_odds_chosen": 0.6547459363937378, "log_odds_ratio": -0.44633957743644714, "logits/chosen": -1.0706005096435547, "logits/rejected": -1.034470558166504, "logps/chosen": -1.2188279628753662, "logps/rejected": -1.6781857013702393, "loss": 1.9883, "nll_loss": 0.452441543340683, "rewards/accuracies": 1.0, "rewards/chosen": -0.12188279628753662, "rewards/margins": 0.04593577980995178, "rewards/rejected": -0.1678185760974884, "step": 757 }, { "epoch": 1.998681608437706, "grad_norm": 10.383499145507812, "learning_rate": 2.6903283052351376e-06, "log_odds_chosen": 0.5008169412612915, "log_odds_ratio": -0.48359251022338867, "logits/chosen": -1.0675674676895142, "logits/rejected": -1.0341382026672363, "logps/chosen": -1.3880095481872559, "logps/rejected": -1.7721683979034424, "loss": 2.4269, "nll_loss": 0.5583770275115967, "rewards/accuracies": 1.0, "rewards/chosen": -0.1388009637594223, "rewards/margins": 0.03841589018702507, "rewards/rejected": -0.17721684277057648, "step": 758 }, { "epoch": 2.001318391562294, "grad_norm": 11.348942756652832, "learning_rate": 2.6832298136645965e-06, "log_odds_chosen": 0.8849374055862427, "log_odds_ratio": -0.4379063546657562, "logits/chosen": -1.1419169902801514, "logits/rejected": -1.0298004150390625, "logps/chosen": -1.3286932706832886, "logps/rejected": -2.0717482566833496, "loss": 2.2291, "nll_loss": 0.5134726166725159, "rewards/accuracies": 1.0, "rewards/chosen": -0.13286933302879333, "rewards/margins": 0.07430551201105118, "rewards/rejected": -0.20717483758926392, "step": 759 }, { "epoch": 2.0039551746868822, "grad_norm": 10.079428672790527, "learning_rate": 2.6761313220940545e-06, "log_odds_chosen": 0.5260939598083496, "log_odds_ratio": -0.47664958238601685, "logits/chosen": -1.209180235862732, "logits/rejected": -1.0874968767166138, "logps/chosen": -1.2297840118408203, "logps/rejected": -1.6314725875854492, "loss": 2.2452, "nll_loss": 0.513647198677063, "rewards/accuracies": 1.0, "rewards/chosen": -0.12297839671373367, "rewards/margins": 0.04016885906457901, "rewards/rejected": -0.16314725577831268, "step": 760 }, { "epoch": 2.00659195781147, "grad_norm": 10.240530967712402, "learning_rate": 2.6690328305235134e-06, "log_odds_chosen": 0.3144952356815338, "log_odds_ratio": -0.556167721748352, "logits/chosen": -1.161802053451538, "logits/rejected": -1.1026802062988281, "logps/chosen": -1.4659947156906128, "logps/rejected": -1.7172274589538574, "loss": 2.5276, "nll_loss": 0.5762755870819092, "rewards/accuracies": 0.875, "rewards/chosen": -0.14659947156906128, "rewards/margins": 0.025123273953795433, "rewards/rejected": -0.17172273993492126, "step": 761 }, { "epoch": 2.009228740936058, "grad_norm": 9.578766822814941, "learning_rate": 2.6619343389529723e-06, "log_odds_chosen": 0.7745237946510315, "log_odds_ratio": -0.44236063957214355, "logits/chosen": -1.0694057941436768, "logits/rejected": -0.9940791130065918, "logps/chosen": -1.2447141408920288, "logps/rejected": -1.8774677515029907, "loss": 2.1539, "nll_loss": 0.4942498803138733, "rewards/accuracies": 0.875, "rewards/chosen": -0.12447141110897064, "rewards/margins": 0.06327535212039948, "rewards/rejected": -0.1877467781305313, "step": 762 }, { "epoch": 2.0118655240606462, "grad_norm": 10.117440223693848, "learning_rate": 2.6548358473824312e-06, "log_odds_chosen": 0.538081169128418, "log_odds_ratio": -0.46689748764038086, "logits/chosen": -1.1605879068374634, "logits/rejected": -1.0427160263061523, "logps/chosen": -1.3132154941558838, "logps/rejected": -1.7294139862060547, "loss": 2.5959, "nll_loss": 0.6022971272468567, "rewards/accuracies": 1.0, "rewards/chosen": -0.13132154941558838, "rewards/margins": 0.04161985218524933, "rewards/rejected": -0.1729414016008377, "step": 763 }, { "epoch": 2.014502307185234, "grad_norm": 9.777716636657715, "learning_rate": 2.6477373558118897e-06, "log_odds_chosen": 0.5327706933021545, "log_odds_ratio": -0.49130064249038696, "logits/chosen": -1.1164560317993164, "logits/rejected": -1.0969555377960205, "logps/chosen": -1.237712025642395, "logps/rejected": -1.625910758972168, "loss": 2.266, "nll_loss": 0.5173711776733398, "rewards/accuracies": 0.875, "rewards/chosen": -0.12377119064331055, "rewards/margins": 0.03881988301873207, "rewards/rejected": -0.1625910848379135, "step": 764 }, { "epoch": 2.017139090309822, "grad_norm": 9.787508010864258, "learning_rate": 2.6406388642413486e-06, "log_odds_chosen": 0.736811101436615, "log_odds_ratio": -0.40617555379867554, "logits/chosen": -1.1186765432357788, "logits/rejected": -1.0302070379257202, "logps/chosen": -1.132691502571106, "logps/rejected": -1.656376838684082, "loss": 2.1621, "nll_loss": 0.49990198016166687, "rewards/accuracies": 0.875, "rewards/chosen": -0.11326915770769119, "rewards/margins": 0.052368536591529846, "rewards/rejected": -0.16563768684864044, "step": 765 }, { "epoch": 2.0197758734344102, "grad_norm": 10.189085960388184, "learning_rate": 2.6335403726708075e-06, "log_odds_chosen": 0.5936908721923828, "log_odds_ratio": -0.45090213418006897, "logits/chosen": -1.2093477249145508, "logits/rejected": -1.0756824016571045, "logps/chosen": -1.3594684600830078, "logps/rejected": -1.8339993953704834, "loss": 2.5355, "nll_loss": 0.5887949466705322, "rewards/accuracies": 1.0, "rewards/chosen": -0.1359468400478363, "rewards/margins": 0.047453105449676514, "rewards/rejected": -0.18339994549751282, "step": 766 }, { "epoch": 2.022412656558998, "grad_norm": 10.076580047607422, "learning_rate": 2.6264418811002664e-06, "log_odds_chosen": 0.513107180595398, "log_odds_ratio": -0.4888473451137543, "logits/chosen": -1.129032850265503, "logits/rejected": -1.0239429473876953, "logps/chosen": -1.4806454181671143, "logps/rejected": -1.900446891784668, "loss": 2.3663, "nll_loss": 0.5426843762397766, "rewards/accuracies": 1.0, "rewards/chosen": -0.14806455373764038, "rewards/margins": 0.04198013246059418, "rewards/rejected": -0.19004470109939575, "step": 767 }, { "epoch": 2.025049439683586, "grad_norm": 8.9835844039917, "learning_rate": 2.619343389529725e-06, "log_odds_chosen": 0.5582860112190247, "log_odds_ratio": -0.47621530294418335, "logits/chosen": -0.9895690083503723, "logits/rejected": -0.9377783536911011, "logps/chosen": -1.24507474899292, "logps/rejected": -1.6821479797363281, "loss": 1.5754, "nll_loss": 0.3462305963039398, "rewards/accuracies": 0.75, "rewards/chosen": -0.12450747936964035, "rewards/margins": 0.043707333505153656, "rewards/rejected": -0.168214812874794, "step": 768 }, { "epoch": 2.0276862228081742, "grad_norm": 10.730051040649414, "learning_rate": 2.6122448979591834e-06, "log_odds_chosen": 0.7082866430282593, "log_odds_ratio": -0.4379771947860718, "logits/chosen": -1.226049780845642, "logits/rejected": -1.099149465560913, "logps/chosen": -1.4317500591278076, "logps/rejected": -2.018101215362549, "loss": 2.9005, "nll_loss": 0.681334912776947, "rewards/accuracies": 0.875, "rewards/chosen": -0.14317500591278076, "rewards/margins": 0.05863512307405472, "rewards/rejected": -0.20181013643741608, "step": 769 }, { "epoch": 2.030323005932762, "grad_norm": 9.710260391235352, "learning_rate": 2.6051464063886423e-06, "log_odds_chosen": 0.770453929901123, "log_odds_ratio": -0.43126559257507324, "logits/chosen": -1.1383628845214844, "logits/rejected": -1.0302354097366333, "logps/chosen": -1.2499903440475464, "logps/rejected": -1.8305046558380127, "loss": 2.3788, "nll_loss": 0.5515839457511902, "rewards/accuracies": 1.0, "rewards/chosen": -0.1249990463256836, "rewards/margins": 0.05805141478776932, "rewards/rejected": -0.18305045366287231, "step": 770 }, { "epoch": 2.03295978905735, "grad_norm": 10.036596298217773, "learning_rate": 2.5980479148181007e-06, "log_odds_chosen": 0.6412467360496521, "log_odds_ratio": -0.46849995851516724, "logits/chosen": -1.131897211074829, "logits/rejected": -1.059012532234192, "logps/chosen": -1.3608183860778809, "logps/rejected": -1.8439494371414185, "loss": 2.6094, "nll_loss": 0.605492115020752, "rewards/accuracies": 0.75, "rewards/chosen": -0.13608182966709137, "rewards/margins": 0.048313114792108536, "rewards/rejected": -0.1843949556350708, "step": 771 }, { "epoch": 2.0355965721819382, "grad_norm": 10.127285957336426, "learning_rate": 2.5909494232475596e-06, "log_odds_chosen": 0.2889218330383301, "log_odds_ratio": -0.5826900601387024, "logits/chosen": -1.1097972393035889, "logits/rejected": -1.0736355781555176, "logps/chosen": -1.3271205425262451, "logps/rejected": -1.5551434755325317, "loss": 2.75, "nll_loss": 0.6292246580123901, "rewards/accuracies": 0.625, "rewards/chosen": -0.13271205127239227, "rewards/margins": 0.022802293300628662, "rewards/rejected": -0.15551434457302094, "step": 772 }, { "epoch": 2.038233355306526, "grad_norm": 9.093977928161621, "learning_rate": 2.5838509316770185e-06, "log_odds_chosen": 0.44692784547805786, "log_odds_ratio": -0.5036763548851013, "logits/chosen": -1.0858466625213623, "logits/rejected": -1.040008544921875, "logps/chosen": -1.080949306488037, "logps/rejected": -1.3944963216781616, "loss": 1.8039, "nll_loss": 0.40060439705848694, "rewards/accuracies": 1.0, "rewards/chosen": -0.10809493809938431, "rewards/margins": 0.031354695558547974, "rewards/rejected": -0.13944962620735168, "step": 773 }, { "epoch": 2.040870138431114, "grad_norm": 9.224924087524414, "learning_rate": 2.5767524401064774e-06, "log_odds_chosen": 0.7012994289398193, "log_odds_ratio": -0.41290491819381714, "logits/chosen": -1.1321061849594116, "logits/rejected": -1.0112411975860596, "logps/chosen": -1.133347749710083, "logps/rejected": -1.6516485214233398, "loss": 2.177, "nll_loss": 0.5029694437980652, "rewards/accuracies": 1.0, "rewards/chosen": -0.1133347749710083, "rewards/margins": 0.051830075681209564, "rewards/rejected": -0.16516485810279846, "step": 774 }, { "epoch": 2.0435069215557022, "grad_norm": 10.931916236877441, "learning_rate": 2.569653948535936e-06, "log_odds_chosen": 0.48683279752731323, "log_odds_ratio": -0.48779165744781494, "logits/chosen": -1.1395113468170166, "logits/rejected": -1.055709958076477, "logps/chosen": -1.5400993824005127, "logps/rejected": -1.9409468173980713, "loss": 2.6751, "nll_loss": 0.6199974417686462, "rewards/accuracies": 1.0, "rewards/chosen": -0.15400993824005127, "rewards/margins": 0.04008473455905914, "rewards/rejected": -0.1940946877002716, "step": 775 }, { "epoch": 2.04614370468029, "grad_norm": 10.797554969787598, "learning_rate": 2.562555456965395e-06, "log_odds_chosen": 0.42376700043678284, "log_odds_ratio": -0.5112159848213196, "logits/chosen": -1.163484811782837, "logits/rejected": -1.0718196630477905, "logps/chosen": -1.3019428253173828, "logps/rejected": -1.6327495574951172, "loss": 2.4008, "nll_loss": 0.5490672588348389, "rewards/accuracies": 1.0, "rewards/chosen": -0.1301942765712738, "rewards/margins": 0.03308069333434105, "rewards/rejected": -0.16327497363090515, "step": 776 }, { "epoch": 2.048780487804878, "grad_norm": 9.408236503601074, "learning_rate": 2.5554569653948537e-06, "log_odds_chosen": 0.6201867461204529, "log_odds_ratio": -0.4448419511318207, "logits/chosen": -1.1767107248306274, "logits/rejected": -1.074312686920166, "logps/chosen": -1.1322187185287476, "logps/rejected": -1.593127965927124, "loss": 2.1043, "nll_loss": 0.48158198595046997, "rewards/accuracies": 1.0, "rewards/chosen": -0.11322186887264252, "rewards/margins": 0.04609092324972153, "rewards/rejected": -0.15931278467178345, "step": 777 }, { "epoch": 2.0514172709294662, "grad_norm": 10.060628890991211, "learning_rate": 2.548358473824312e-06, "log_odds_chosen": 0.8383358716964722, "log_odds_ratio": -0.3872286081314087, "logits/chosen": -1.1026513576507568, "logits/rejected": -1.02262282371521, "logps/chosen": -1.244518756866455, "logps/rejected": -1.8661563396453857, "loss": 2.0012, "nll_loss": 0.4615873098373413, "rewards/accuracies": 1.0, "rewards/chosen": -0.12445187568664551, "rewards/margins": 0.06216376647353172, "rewards/rejected": -0.18661564588546753, "step": 778 }, { "epoch": 2.054054054054054, "grad_norm": 9.155142784118652, "learning_rate": 2.5412599822537707e-06, "log_odds_chosen": 0.38540011644363403, "log_odds_ratio": -0.5317520499229431, "logits/chosen": -1.0778992176055908, "logits/rejected": -1.0378587245941162, "logps/chosen": -1.3756157159805298, "logps/rejected": -1.6602983474731445, "loss": 2.061, "nll_loss": 0.4620826840400696, "rewards/accuracies": 0.875, "rewards/chosen": -0.13756157457828522, "rewards/margins": 0.02846825122833252, "rewards/rejected": -0.16602981090545654, "step": 779 }, { "epoch": 2.056690837178642, "grad_norm": 10.545321464538574, "learning_rate": 2.5341614906832296e-06, "log_odds_chosen": 0.505893349647522, "log_odds_ratio": -0.4876767694950104, "logits/chosen": -1.128211259841919, "logits/rejected": -0.9622944593429565, "logps/chosen": -1.4639647006988525, "logps/rejected": -1.8753548860549927, "loss": 2.6502, "nll_loss": 0.6137940883636475, "rewards/accuracies": 0.875, "rewards/chosen": -0.1463964730501175, "rewards/margins": 0.041139017790555954, "rewards/rejected": -0.18753549456596375, "step": 780 }, { "epoch": 2.0593276203032302, "grad_norm": 9.913485527038574, "learning_rate": 2.5270629991126885e-06, "log_odds_chosen": 0.3947891592979431, "log_odds_ratio": -0.5272966027259827, "logits/chosen": -1.2290658950805664, "logits/rejected": -1.1424353122711182, "logps/chosen": -1.2813048362731934, "logps/rejected": -1.5810402631759644, "loss": 2.6443, "nll_loss": 0.6083491444587708, "rewards/accuracies": 0.875, "rewards/chosen": -0.1281304806470871, "rewards/margins": 0.029973559081554413, "rewards/rejected": -0.1581040322780609, "step": 781 }, { "epoch": 2.061964403427818, "grad_norm": 9.621504783630371, "learning_rate": 2.5199645075421474e-06, "log_odds_chosen": 0.6991655826568604, "log_odds_ratio": -0.4380638003349304, "logits/chosen": -1.1538786888122559, "logits/rejected": -1.0285148620605469, "logps/chosen": -1.1817439794540405, "logps/rejected": -1.7146399021148682, "loss": 2.2452, "nll_loss": 0.517501175403595, "rewards/accuracies": 1.0, "rewards/chosen": -0.11817440390586853, "rewards/margins": 0.05328959599137306, "rewards/rejected": -0.1714639961719513, "step": 782 }, { "epoch": 2.064601186552406, "grad_norm": 11.298930168151855, "learning_rate": 2.512866015971606e-06, "log_odds_chosen": 0.5812134742736816, "log_odds_ratio": -0.45801496505737305, "logits/chosen": -1.2240318059921265, "logits/rejected": -1.0378308296203613, "logps/chosen": -1.3979883193969727, "logps/rejected": -1.8701411485671997, "loss": 2.7863, "nll_loss": 0.6507730484008789, "rewards/accuracies": 1.0, "rewards/chosen": -0.1397988349199295, "rewards/margins": 0.0472152978181839, "rewards/rejected": -0.1870141327381134, "step": 783 }, { "epoch": 2.0672379696769942, "grad_norm": 9.547139167785645, "learning_rate": 2.5057675244010647e-06, "log_odds_chosen": 0.5346148610115051, "log_odds_ratio": -0.4834967255592346, "logits/chosen": -1.0990629196166992, "logits/rejected": -1.0282251834869385, "logps/chosen": -1.0115312337875366, "logps/rejected": -1.3923074007034302, "loss": 1.8481, "nll_loss": 0.4136834144592285, "rewards/accuracies": 1.0, "rewards/chosen": -0.10115312039852142, "rewards/margins": 0.03807761147618294, "rewards/rejected": -0.13923074305057526, "step": 784 }, { "epoch": 2.069874752801582, "grad_norm": 10.128329277038574, "learning_rate": 2.4986690328305236e-06, "log_odds_chosen": 0.5243815779685974, "log_odds_ratio": -0.4750070571899414, "logits/chosen": -1.1562871932983398, "logits/rejected": -1.0403151512145996, "logps/chosen": -1.2300305366516113, "logps/rejected": -1.6397430896759033, "loss": 2.0847, "nll_loss": 0.4736851453781128, "rewards/accuracies": 1.0, "rewards/chosen": -0.1230030506849289, "rewards/margins": 0.040971267968416214, "rewards/rejected": -0.1639743149280548, "step": 785 }, { "epoch": 2.07251153592617, "grad_norm": 10.04702091217041, "learning_rate": 2.4915705412599825e-06, "log_odds_chosen": 0.7055188417434692, "log_odds_ratio": -0.4192762076854706, "logits/chosen": -1.1293883323669434, "logits/rejected": -1.0029202699661255, "logps/chosen": -1.264027714729309, "logps/rejected": -1.823844075202942, "loss": 2.281, "nll_loss": 0.5283329486846924, "rewards/accuracies": 1.0, "rewards/chosen": -0.12640278041362762, "rewards/margins": 0.05598163977265358, "rewards/rejected": -0.1823844015598297, "step": 786 }, { "epoch": 2.0751483190507582, "grad_norm": 9.414009094238281, "learning_rate": 2.4844720496894406e-06, "log_odds_chosen": 0.595037579536438, "log_odds_ratio": -0.46986937522888184, "logits/chosen": -1.0502935647964478, "logits/rejected": -1.0202876329421997, "logps/chosen": -1.0903228521347046, "logps/rejected": -1.5388803482055664, "loss": 1.7637, "nll_loss": 0.39393508434295654, "rewards/accuracies": 1.0, "rewards/chosen": -0.1090322881937027, "rewards/margins": 0.044855739921331406, "rewards/rejected": -0.1538880169391632, "step": 787 }, { "epoch": 2.077785102175346, "grad_norm": 9.77463436126709, "learning_rate": 2.4773735581188995e-06, "log_odds_chosen": 0.7229048013687134, "log_odds_ratio": -0.40499523282051086, "logits/chosen": -1.1099520921707153, "logits/rejected": -1.0251625776290894, "logps/chosen": -1.338236689567566, "logps/rejected": -1.9192970991134644, "loss": 2.149, "nll_loss": 0.4967554211616516, "rewards/accuracies": 1.0, "rewards/chosen": -0.1338236927986145, "rewards/margins": 0.058106038719415665, "rewards/rejected": -0.19192972779273987, "step": 788 }, { "epoch": 2.080421885299934, "grad_norm": 10.018221855163574, "learning_rate": 2.4702750665483584e-06, "log_odds_chosen": 0.557384192943573, "log_odds_ratio": -0.48614200949668884, "logits/chosen": -1.125726342201233, "logits/rejected": -1.0347040891647339, "logps/chosen": -1.2295360565185547, "logps/rejected": -1.650094747543335, "loss": 2.0944, "nll_loss": 0.4749777913093567, "rewards/accuracies": 0.75, "rewards/chosen": -0.1229536160826683, "rewards/margins": 0.04205586016178131, "rewards/rejected": -0.16500946879386902, "step": 789 }, { "epoch": 2.0830586684245223, "grad_norm": 9.642407417297363, "learning_rate": 2.463176574977817e-06, "log_odds_chosen": 0.6076935529708862, "log_odds_ratio": -0.4736708998680115, "logits/chosen": -1.0674395561218262, "logits/rejected": -1.0196499824523926, "logps/chosen": -1.3642213344573975, "logps/rejected": -1.8080679178237915, "loss": 2.0324, "nll_loss": 0.4607207179069519, "rewards/accuracies": 0.875, "rewards/chosen": -0.13642212748527527, "rewards/margins": 0.044384658336639404, "rewards/rejected": -0.18080680072307587, "step": 790 }, { "epoch": 2.08569545154911, "grad_norm": 9.645079612731934, "learning_rate": 2.4560780834072758e-06, "log_odds_chosen": 0.6035420894622803, "log_odds_ratio": -0.44714921712875366, "logits/chosen": -1.1527628898620605, "logits/rejected": -1.0415222644805908, "logps/chosen": -1.1864968538284302, "logps/rejected": -1.6468815803527832, "loss": 2.1985, "nll_loss": 0.504905104637146, "rewards/accuracies": 1.0, "rewards/chosen": -0.1186496764421463, "rewards/margins": 0.046038489788770676, "rewards/rejected": -0.16468816995620728, "step": 791 }, { "epoch": 2.088332234673698, "grad_norm": 10.855006217956543, "learning_rate": 2.4489795918367347e-06, "log_odds_chosen": 0.7123809456825256, "log_odds_ratio": -0.4443729519844055, "logits/chosen": -1.128336787223816, "logits/rejected": -0.983013391494751, "logps/chosen": -1.3530423641204834, "logps/rejected": -1.9418303966522217, "loss": 2.4777, "nll_loss": 0.5749930143356323, "rewards/accuracies": 1.0, "rewards/chosen": -0.13530424237251282, "rewards/margins": 0.05887877941131592, "rewards/rejected": -0.19418302178382874, "step": 792 }, { "epoch": 2.0909690177982863, "grad_norm": 10.273240089416504, "learning_rate": 2.4418811002661936e-06, "log_odds_chosen": 0.6091665029525757, "log_odds_ratio": -0.4456428587436676, "logits/chosen": -1.0923587083816528, "logits/rejected": -0.965667188167572, "logps/chosen": -1.3967857360839844, "logps/rejected": -1.8644620180130005, "loss": 2.2365, "nll_loss": 0.5145605802536011, "rewards/accuracies": 1.0, "rewards/chosen": -0.13967856764793396, "rewards/margins": 0.04676762968301773, "rewards/rejected": -0.1864461898803711, "step": 793 }, { "epoch": 2.093605800922874, "grad_norm": 9.252375602722168, "learning_rate": 2.434782608695652e-06, "log_odds_chosen": 0.5784919857978821, "log_odds_ratio": -0.4597025215625763, "logits/chosen": -1.0548346042633057, "logits/rejected": -1.0105911493301392, "logps/chosen": -1.1711606979370117, "logps/rejected": -1.5927342176437378, "loss": 1.7621, "nll_loss": 0.3945651650428772, "rewards/accuracies": 1.0, "rewards/chosen": -0.11711607873439789, "rewards/margins": 0.04215734824538231, "rewards/rejected": -0.1592734307050705, "step": 794 }, { "epoch": 2.096242584047462, "grad_norm": 9.470747947692871, "learning_rate": 2.427684117125111e-06, "log_odds_chosen": 0.7224401235580444, "log_odds_ratio": -0.42393580079078674, "logits/chosen": -1.1122452020645142, "logits/rejected": -1.0029590129852295, "logps/chosen": -1.26948881149292, "logps/rejected": -1.857187271118164, "loss": 2.2675, "nll_loss": 0.5244921445846558, "rewards/accuracies": 1.0, "rewards/chosen": -0.12694887816905975, "rewards/margins": 0.05876985937356949, "rewards/rejected": -0.18571873009204865, "step": 795 }, { "epoch": 2.0988793671720503, "grad_norm": 9.463521003723145, "learning_rate": 2.4205856255545694e-06, "log_odds_chosen": 1.0167723894119263, "log_odds_ratio": -0.3493209481239319, "logits/chosen": -1.101204752922058, "logits/rejected": -1.0462161302566528, "logps/chosen": -0.9706986546516418, "logps/rejected": -1.6190752983093262, "loss": 1.6351, "nll_loss": 0.3738459348678589, "rewards/accuracies": 1.0, "rewards/chosen": -0.0970698744058609, "rewards/margins": 0.06483766436576843, "rewards/rejected": -0.16190752387046814, "step": 796 }, { "epoch": 2.101516150296638, "grad_norm": 9.539721488952637, "learning_rate": 2.4134871339840283e-06, "log_odds_chosen": 0.49127352237701416, "log_odds_ratio": -0.4973955452442169, "logits/chosen": -1.0409544706344604, "logits/rejected": -1.0238300561904907, "logps/chosen": -1.3186019659042358, "logps/rejected": -1.69057035446167, "loss": 2.0852, "nll_loss": 0.4715545177459717, "rewards/accuracies": 1.0, "rewards/chosen": -0.13186019659042358, "rewards/margins": 0.03719683736562729, "rewards/rejected": -0.16905704140663147, "step": 797 }, { "epoch": 2.104152933421226, "grad_norm": 10.430363655090332, "learning_rate": 2.406388642413487e-06, "log_odds_chosen": 0.2506150007247925, "log_odds_ratio": -0.5804030895233154, "logits/chosen": -1.1437674760818481, "logits/rejected": -1.1276366710662842, "logps/chosen": -1.5441486835479736, "logps/rejected": -1.736472487449646, "loss": 2.6284, "nll_loss": 0.5990653038024902, "rewards/accuracies": 0.875, "rewards/chosen": -0.15441486239433289, "rewards/margins": 0.019232388585805893, "rewards/rejected": -0.17364725470542908, "step": 798 }, { "epoch": 2.1067897165458143, "grad_norm": 9.204401016235352, "learning_rate": 2.3992901508429457e-06, "log_odds_chosen": 0.9071800708770752, "log_odds_ratio": -0.3889234960079193, "logits/chosen": -1.079196572303772, "logits/rejected": -0.9312431812286377, "logps/chosen": -1.2495801448822021, "logps/rejected": -1.9906275272369385, "loss": 1.8734, "nll_loss": 0.4294639229774475, "rewards/accuracies": 1.0, "rewards/chosen": -0.12495802342891693, "rewards/margins": 0.07410473376512527, "rewards/rejected": -0.1990627497434616, "step": 799 }, { "epoch": 2.109426499670402, "grad_norm": 10.068049430847168, "learning_rate": 2.3921916592724046e-06, "log_odds_chosen": 0.5332250595092773, "log_odds_ratio": -0.48249611258506775, "logits/chosen": -1.14569091796875, "logits/rejected": -1.0281379222869873, "logps/chosen": -1.500299334526062, "logps/rejected": -1.9449788331985474, "loss": 2.7113, "nll_loss": 0.6295745968818665, "rewards/accuracies": 0.875, "rewards/chosen": -0.15002992749214172, "rewards/margins": 0.04446795582771301, "rewards/rejected": -0.19449788331985474, "step": 800 }, { "epoch": 2.11206328279499, "grad_norm": 10.614519119262695, "learning_rate": 2.3850931677018635e-06, "log_odds_chosen": 0.5133019089698792, "log_odds_ratio": -0.48530033230781555, "logits/chosen": -1.0950928926467896, "logits/rejected": -0.9656269550323486, "logps/chosen": -1.5714972019195557, "logps/rejected": -2.00163197517395, "loss": 2.49, "nll_loss": 0.5739596486091614, "rewards/accuracies": 0.875, "rewards/chosen": -0.15714971721172333, "rewards/margins": 0.04301348328590393, "rewards/rejected": -0.20016320049762726, "step": 801 }, { "epoch": 2.1147000659195783, "grad_norm": 8.792914390563965, "learning_rate": 2.377994676131322e-06, "log_odds_chosen": 0.7065452337265015, "log_odds_ratio": -0.4272323250770569, "logits/chosen": -1.0454217195510864, "logits/rejected": -0.9867510199546814, "logps/chosen": -1.188128113746643, "logps/rejected": -1.7429618835449219, "loss": 1.7372, "nll_loss": 0.39158543944358826, "rewards/accuracies": 1.0, "rewards/chosen": -0.11881281435489655, "rewards/margins": 0.055483367294073105, "rewards/rejected": -0.17429618537425995, "step": 802 }, { "epoch": 2.117336849044166, "grad_norm": 9.577733039855957, "learning_rate": 2.370896184560781e-06, "log_odds_chosen": 0.41758400201797485, "log_odds_ratio": -0.5160441398620605, "logits/chosen": -1.1367295980453491, "logits/rejected": -1.0775389671325684, "logps/chosen": -1.2276097536087036, "logps/rejected": -1.538201093673706, "loss": 2.2566, "nll_loss": 0.5125335454940796, "rewards/accuracies": 1.0, "rewards/chosen": -0.12276098132133484, "rewards/margins": 0.031059138476848602, "rewards/rejected": -0.15382012724876404, "step": 803 }, { "epoch": 2.119973632168754, "grad_norm": 9.7593412399292, "learning_rate": 2.3637976929902398e-06, "log_odds_chosen": 0.5125753283500671, "log_odds_ratio": -0.48863285779953003, "logits/chosen": -1.089426875114441, "logits/rejected": -1.052983283996582, "logps/chosen": -1.442460298538208, "logps/rejected": -1.8569387197494507, "loss": 2.1403, "nll_loss": 0.4862205684185028, "rewards/accuracies": 1.0, "rewards/chosen": -0.14424604177474976, "rewards/margins": 0.04144783690571785, "rewards/rejected": -0.1856938600540161, "step": 804 }, { "epoch": 2.1226104152933423, "grad_norm": 9.3715181350708, "learning_rate": 2.356699201419698e-06, "log_odds_chosen": 0.4354270398616791, "log_odds_ratio": -0.514744758605957, "logits/chosen": -1.085614562034607, "logits/rejected": -1.0473663806915283, "logps/chosen": -1.2321531772613525, "logps/rejected": -1.5729819536209106, "loss": 2.0199, "nll_loss": 0.4535001516342163, "rewards/accuracies": 1.0, "rewards/chosen": -0.12321531772613525, "rewards/margins": 0.034082889556884766, "rewards/rejected": -0.15729820728302002, "step": 805 }, { "epoch": 2.12524719841793, "grad_norm": 10.585258483886719, "learning_rate": 2.3496007098491567e-06, "log_odds_chosen": 0.5048158764839172, "log_odds_ratio": -0.48133382201194763, "logits/chosen": -1.141870379447937, "logits/rejected": -1.0114864110946655, "logps/chosen": -1.4730051755905151, "logps/rejected": -1.8799927234649658, "loss": 2.6892, "nll_loss": 0.6241719722747803, "rewards/accuracies": 1.0, "rewards/chosen": -0.14730052649974823, "rewards/margins": 0.04069875553250313, "rewards/rejected": -0.18799926340579987, "step": 806 }, { "epoch": 2.127883981542518, "grad_norm": 9.937313079833984, "learning_rate": 2.3425022182786156e-06, "log_odds_chosen": 0.7905609011650085, "log_odds_ratio": -0.4188607335090637, "logits/chosen": -1.0846725702285767, "logits/rejected": -1.025403618812561, "logps/chosen": -1.1108648777008057, "logps/rejected": -1.6417710781097412, "loss": 1.7662, "nll_loss": 0.3996736705303192, "rewards/accuracies": 1.0, "rewards/chosen": -0.11108650267124176, "rewards/margins": 0.05309060961008072, "rewards/rejected": -0.16417710483074188, "step": 807 }, { "epoch": 2.1305207646671063, "grad_norm": 9.610738754272461, "learning_rate": 2.3354037267080745e-06, "log_odds_chosen": 0.5842036008834839, "log_odds_ratio": -0.4494381248950958, "logits/chosen": -1.0718939304351807, "logits/rejected": -0.9993994235992432, "logps/chosen": -1.1714190244674683, "logps/rejected": -1.5994045734405518, "loss": 1.7764, "nll_loss": 0.39915409684181213, "rewards/accuracies": 1.0, "rewards/chosen": -0.11714190989732742, "rewards/margins": 0.04279854893684387, "rewards/rejected": -0.1599404513835907, "step": 808 }, { "epoch": 2.133157547791694, "grad_norm": 9.702584266662598, "learning_rate": 2.328305235137533e-06, "log_odds_chosen": 0.8238008618354797, "log_odds_ratio": -0.39040911197662354, "logits/chosen": -1.0546904802322388, "logits/rejected": -1.0126965045928955, "logps/chosen": -1.2540767192840576, "logps/rejected": -1.9113447666168213, "loss": 2.0101, "nll_loss": 0.4634771943092346, "rewards/accuracies": 0.875, "rewards/chosen": -0.12540766596794128, "rewards/margins": 0.06572680175304413, "rewards/rejected": -0.1911344677209854, "step": 809 }, { "epoch": 2.135794330916282, "grad_norm": 10.387869834899902, "learning_rate": 2.321206743566992e-06, "log_odds_chosen": 0.2536086142063141, "log_odds_ratio": -0.5774084329605103, "logits/chosen": -1.2409507036209106, "logits/rejected": -1.1530442237854004, "logps/chosen": -1.430117130279541, "logps/rejected": -1.633103370666504, "loss": 2.8274, "nll_loss": 0.649109959602356, "rewards/accuracies": 0.875, "rewards/chosen": -0.14301171898841858, "rewards/margins": 0.020298613235354424, "rewards/rejected": -0.16331034898757935, "step": 810 }, { "epoch": 2.1384311140408703, "grad_norm": 9.708829879760742, "learning_rate": 2.314108251996451e-06, "log_odds_chosen": 0.5131632685661316, "log_odds_ratio": -0.48157426714897156, "logits/chosen": -1.1093146800994873, "logits/rejected": -1.0246758460998535, "logps/chosen": -1.308085560798645, "logps/rejected": -1.709665298461914, "loss": 2.0433, "nll_loss": 0.46267786622047424, "rewards/accuracies": 0.875, "rewards/chosen": -0.13080856204032898, "rewards/margins": 0.040157970041036606, "rewards/rejected": -0.17096653580665588, "step": 811 }, { "epoch": 2.141067897165458, "grad_norm": 10.519964218139648, "learning_rate": 2.3070097604259097e-06, "log_odds_chosen": 0.7052963376045227, "log_odds_ratio": -0.42012089490890503, "logits/chosen": -1.0860507488250732, "logits/rejected": -1.0053993463516235, "logps/chosen": -1.4012532234191895, "logps/rejected": -1.9778465032577515, "loss": 2.2422, "nll_loss": 0.5185346603393555, "rewards/accuracies": 1.0, "rewards/chosen": -0.1401253342628479, "rewards/margins": 0.057659320533275604, "rewards/rejected": -0.1977846622467041, "step": 812 }, { "epoch": 2.143704680290046, "grad_norm": 9.929567337036133, "learning_rate": 2.299911268855368e-06, "log_odds_chosen": 0.5145715475082397, "log_odds_ratio": -0.5079393982887268, "logits/chosen": -1.0435419082641602, "logits/rejected": -0.9938980340957642, "logps/chosen": -1.2124900817871094, "logps/rejected": -1.5268044471740723, "loss": 2.3445, "nll_loss": 0.5353326797485352, "rewards/accuracies": 0.75, "rewards/chosen": -0.1212489977478981, "rewards/margins": 0.03143144026398659, "rewards/rejected": -0.152680441737175, "step": 813 }, { "epoch": 2.1463414634146343, "grad_norm": 9.384446144104004, "learning_rate": 2.2928127772848267e-06, "log_odds_chosen": 0.7435194849967957, "log_odds_ratio": -0.42445817589759827, "logits/chosen": -1.0636303424835205, "logits/rejected": -0.9686412215232849, "logps/chosen": -1.2277225255966187, "logps/rejected": -1.7723865509033203, "loss": 1.9616, "nll_loss": 0.44795966148376465, "rewards/accuracies": 1.0, "rewards/chosen": -0.12277225404977798, "rewards/margins": 0.05446639657020569, "rewards/rejected": -0.17723865807056427, "step": 814 }, { "epoch": 2.148978246539222, "grad_norm": 10.526111602783203, "learning_rate": 2.2857142857142856e-06, "log_odds_chosen": 0.43695545196533203, "log_odds_ratio": -0.5100619196891785, "logits/chosen": -1.0403656959533691, "logits/rejected": -0.9722212553024292, "logps/chosen": -1.3435180187225342, "logps/rejected": -1.6733289957046509, "loss": 2.039, "nll_loss": 0.45873549580574036, "rewards/accuracies": 0.875, "rewards/chosen": -0.13435180485248566, "rewards/margins": 0.03298109024763107, "rewards/rejected": -0.16733288764953613, "step": 815 }, { "epoch": 2.15161502966381, "grad_norm": 8.918171882629395, "learning_rate": 2.2786157941437445e-06, "log_odds_chosen": 0.5507664084434509, "log_odds_ratio": -0.47093725204467773, "logits/chosen": -1.1360986232757568, "logits/rejected": -1.0540223121643066, "logps/chosen": -1.1695194244384766, "logps/rejected": -1.5818474292755127, "loss": 1.8886, "nll_loss": 0.42505964636802673, "rewards/accuracies": 1.0, "rewards/chosen": -0.11695195734500885, "rewards/margins": 0.04123278334736824, "rewards/rejected": -0.1581847369670868, "step": 816 }, { "epoch": 2.1542518127883983, "grad_norm": 10.905034065246582, "learning_rate": 2.271517302573203e-06, "log_odds_chosen": 0.6513671875, "log_odds_ratio": -0.4349040389060974, "logits/chosen": -1.0883439779281616, "logits/rejected": -0.9979703426361084, "logps/chosen": -1.441659688949585, "logps/rejected": -1.9642170667648315, "loss": 2.4105, "nll_loss": 0.5591432452201843, "rewards/accuracies": 1.0, "rewards/chosen": -0.1441659778356552, "rewards/margins": 0.05225573480129242, "rewards/rejected": -0.19642171263694763, "step": 817 }, { "epoch": 2.156888595912986, "grad_norm": 9.421453475952148, "learning_rate": 2.264418811002662e-06, "log_odds_chosen": 0.3477077782154083, "log_odds_ratio": -0.5469948649406433, "logits/chosen": -1.109951138496399, "logits/rejected": -1.0714219808578491, "logps/chosen": -1.2214250564575195, "logps/rejected": -1.4891982078552246, "loss": 1.9733, "nll_loss": 0.43863645195961, "rewards/accuracies": 0.875, "rewards/chosen": -0.12214250862598419, "rewards/margins": 0.026777319610118866, "rewards/rejected": -0.14891982078552246, "step": 818 }, { "epoch": 2.159525379037574, "grad_norm": 10.353071212768555, "learning_rate": 2.2573203194321207e-06, "log_odds_chosen": 0.5227789878845215, "log_odds_ratio": -0.4777028560638428, "logits/chosen": -1.155128836631775, "logits/rejected": -1.083472728729248, "logps/chosen": -1.2611448764801025, "logps/rejected": -1.655976414680481, "loss": 2.5178, "nll_loss": 0.5816807746887207, "rewards/accuracies": 0.875, "rewards/chosen": -0.12611448764801025, "rewards/margins": 0.03948315605521202, "rewards/rejected": -0.16559764742851257, "step": 819 }, { "epoch": 2.1621621621621623, "grad_norm": 10.612918853759766, "learning_rate": 2.2502218278615796e-06, "log_odds_chosen": 0.325747549533844, "log_odds_ratio": -0.5508489608764648, "logits/chosen": -1.2103018760681152, "logits/rejected": -1.083714485168457, "logps/chosen": -1.396499752998352, "logps/rejected": -1.6515552997589111, "loss": 2.6947, "nll_loss": 0.6185782551765442, "rewards/accuracies": 1.0, "rewards/chosen": -0.13964997231960297, "rewards/margins": 0.025505557656288147, "rewards/rejected": -0.1651555299758911, "step": 820 }, { "epoch": 2.16479894528675, "grad_norm": 9.557706832885742, "learning_rate": 2.243123336291038e-06, "log_odds_chosen": 0.48723679780960083, "log_odds_ratio": -0.5050497055053711, "logits/chosen": -1.0607335567474365, "logits/rejected": -1.0438419580459595, "logps/chosen": -1.180483341217041, "logps/rejected": -1.550545573234558, "loss": 1.8689, "nll_loss": 0.4167235195636749, "rewards/accuracies": 1.0, "rewards/chosen": -0.11804834008216858, "rewards/margins": 0.037006210535764694, "rewards/rejected": -0.15505453944206238, "step": 821 }, { "epoch": 2.167435728411338, "grad_norm": 10.278383255004883, "learning_rate": 2.2360248447204966e-06, "log_odds_chosen": 0.4420629143714905, "log_odds_ratio": -0.5143228769302368, "logits/chosen": -1.0987699031829834, "logits/rejected": -1.0278682708740234, "logps/chosen": -1.3499083518981934, "logps/rejected": -1.6710383892059326, "loss": 2.7196, "nll_loss": 0.628455400466919, "rewards/accuracies": 0.75, "rewards/chosen": -0.13499082624912262, "rewards/margins": 0.032113008201122284, "rewards/rejected": -0.1671038269996643, "step": 822 }, { "epoch": 2.1700725115359263, "grad_norm": 8.535082817077637, "learning_rate": 2.2289263531499555e-06, "log_odds_chosen": 1.006068229675293, "log_odds_ratio": -0.3642235994338989, "logits/chosen": -1.0344353914260864, "logits/rejected": -0.9348142147064209, "logps/chosen": -1.1169939041137695, "logps/rejected": -1.9077107906341553, "loss": 1.5286, "nll_loss": 0.34572744369506836, "rewards/accuracies": 1.0, "rewards/chosen": -0.11169938743114471, "rewards/margins": 0.07907170057296753, "rewards/rejected": -0.19077108800411224, "step": 823 }, { "epoch": 2.172709294660514, "grad_norm": 10.125965118408203, "learning_rate": 2.221827861579414e-06, "log_odds_chosen": 0.43210938572883606, "log_odds_ratio": -0.517227292060852, "logits/chosen": -1.0547758340835571, "logits/rejected": -0.9629392027854919, "logps/chosen": -1.4742023944854736, "logps/rejected": -1.8478717803955078, "loss": 2.1903, "nll_loss": 0.4958563446998596, "rewards/accuracies": 0.75, "rewards/chosen": -0.1474202275276184, "rewards/margins": 0.03736693412065506, "rewards/rejected": -0.18478718400001526, "step": 824 }, { "epoch": 2.175346077785102, "grad_norm": 9.980408668518066, "learning_rate": 2.214729370008873e-06, "log_odds_chosen": 0.4883590042591095, "log_odds_ratio": -0.4976821541786194, "logits/chosen": -1.1093132495880127, "logits/rejected": -1.004177212715149, "logps/chosen": -1.3356246948242188, "logps/rejected": -1.7281272411346436, "loss": 2.12, "nll_loss": 0.4802260398864746, "rewards/accuracies": 0.875, "rewards/chosen": -0.13356247544288635, "rewards/margins": 0.03925025463104248, "rewards/rejected": -0.17281271517276764, "step": 825 }, { "epoch": 2.1779828609096903, "grad_norm": 10.402122497558594, "learning_rate": 2.2076308784383318e-06, "log_odds_chosen": 0.36031287908554077, "log_odds_ratio": -0.5396561622619629, "logits/chosen": -1.1205006837844849, "logits/rejected": -1.0499624013900757, "logps/chosen": -1.4112681150436401, "logps/rejected": -1.6909284591674805, "loss": 2.3688, "nll_loss": 0.5382289886474609, "rewards/accuracies": 0.875, "rewards/chosen": -0.141126811504364, "rewards/margins": 0.02796604298055172, "rewards/rejected": -0.16909286379814148, "step": 826 }, { "epoch": 2.180619644034278, "grad_norm": 9.647673606872559, "learning_rate": 2.2005323868677907e-06, "log_odds_chosen": 0.5771644711494446, "log_odds_ratio": -0.45836514234542847, "logits/chosen": -1.152066707611084, "logits/rejected": -1.0163257122039795, "logps/chosen": -1.243578553199768, "logps/rejected": -1.6959649324417114, "loss": 2.3424, "nll_loss": 0.5397616028785706, "rewards/accuracies": 1.0, "rewards/chosen": -0.12435785681009293, "rewards/margins": 0.04523865133523941, "rewards/rejected": -0.16959650814533234, "step": 827 }, { "epoch": 2.183256427158866, "grad_norm": 9.848581314086914, "learning_rate": 2.193433895297249e-06, "log_odds_chosen": 0.2979162633419037, "log_odds_ratio": -0.5617325305938721, "logits/chosen": -1.088724136352539, "logits/rejected": -1.0479118824005127, "logps/chosen": -1.2935439348220825, "logps/rejected": -1.5216472148895264, "loss": 2.18, "nll_loss": 0.48883068561553955, "rewards/accuracies": 0.75, "rewards/chosen": -0.12935440242290497, "rewards/margins": 0.022810325026512146, "rewards/rejected": -0.15216472744941711, "step": 828 }, { "epoch": 2.1858932102834543, "grad_norm": 9.815072059631348, "learning_rate": 2.186335403726708e-06, "log_odds_chosen": 0.5657855868339539, "log_odds_ratio": -0.45759493112564087, "logits/chosen": -1.0358961820602417, "logits/rejected": -0.9519480466842651, "logps/chosen": -1.3682363033294678, "logps/rejected": -1.8163623809814453, "loss": 1.9967, "nll_loss": 0.45340484380722046, "rewards/accuracies": 1.0, "rewards/chosen": -0.1368236392736435, "rewards/margins": 0.044812608510255814, "rewards/rejected": -0.181636244058609, "step": 829 }, { "epoch": 2.188529993408042, "grad_norm": 9.674201011657715, "learning_rate": 2.179236912156167e-06, "log_odds_chosen": 0.8280074596405029, "log_odds_ratio": -0.4154477119445801, "logits/chosen": -1.0819666385650635, "logits/rejected": -0.9875413775444031, "logps/chosen": -1.0795493125915527, "logps/rejected": -1.6378755569458008, "loss": 1.9321, "nll_loss": 0.44148778915405273, "rewards/accuracies": 1.0, "rewards/chosen": -0.10795491933822632, "rewards/margins": 0.0558326430618763, "rewards/rejected": -0.16378755867481232, "step": 830 }, { "epoch": 2.19116677653263, "grad_norm": 9.76904582977295, "learning_rate": 2.1721384205856254e-06, "log_odds_chosen": 0.4637497067451477, "log_odds_ratio": -0.4947831332683563, "logits/chosen": -1.0782703161239624, "logits/rejected": -1.0121512413024902, "logps/chosen": -1.4170691967010498, "logps/rejected": -1.7891721725463867, "loss": 2.1634, "nll_loss": 0.49136391282081604, "rewards/accuracies": 1.0, "rewards/chosen": -0.1417069137096405, "rewards/margins": 0.03721030056476593, "rewards/rejected": -0.17891722917556763, "step": 831 }, { "epoch": 2.1938035596572183, "grad_norm": 10.08104133605957, "learning_rate": 2.165039929015084e-06, "log_odds_chosen": 0.4065963625907898, "log_odds_ratio": -0.5492662787437439, "logits/chosen": -1.1009379625320435, "logits/rejected": -1.0309967994689941, "logps/chosen": -1.2967877388000488, "logps/rejected": -1.6170520782470703, "loss": 2.6675, "nll_loss": 0.611954927444458, "rewards/accuracies": 0.875, "rewards/chosen": -0.12967877089977264, "rewards/margins": 0.032026439905166626, "rewards/rejected": -0.16170522570610046, "step": 832 }, { "epoch": 2.196440342781806, "grad_norm": 9.883426666259766, "learning_rate": 2.1579414374445428e-06, "log_odds_chosen": 0.33256006240844727, "log_odds_ratio": -0.5476436614990234, "logits/chosen": -1.1163049936294556, "logits/rejected": -1.0105125904083252, "logps/chosen": -1.2677969932556152, "logps/rejected": -1.515932559967041, "loss": 2.0921, "nll_loss": 0.4682601988315582, "rewards/accuracies": 1.0, "rewards/chosen": -0.126779705286026, "rewards/margins": 0.024813562631607056, "rewards/rejected": -0.15159326791763306, "step": 833 }, { "epoch": 2.199077125906394, "grad_norm": 9.637345314025879, "learning_rate": 2.1508429458740017e-06, "log_odds_chosen": 0.7757169008255005, "log_odds_ratio": -0.43987080454826355, "logits/chosen": -1.0220355987548828, "logits/rejected": -0.9482641816139221, "logps/chosen": -1.1861026287078857, "logps/rejected": -1.8005510568618774, "loss": 1.7984, "nll_loss": 0.40560632944107056, "rewards/accuracies": 0.875, "rewards/chosen": -0.11861026287078857, "rewards/margins": 0.061444856226444244, "rewards/rejected": -0.18005511164665222, "step": 834 }, { "epoch": 2.2017139090309823, "grad_norm": 9.93033504486084, "learning_rate": 2.1437444543034606e-06, "log_odds_chosen": 0.26072263717651367, "log_odds_ratio": -0.5927022695541382, "logits/chosen": -1.0605559349060059, "logits/rejected": -0.9693813323974609, "logps/chosen": -1.387969970703125, "logps/rejected": -1.5906848907470703, "loss": 2.1978, "nll_loss": 0.49017882347106934, "rewards/accuracies": 0.75, "rewards/chosen": -0.13879700005054474, "rewards/margins": 0.02027149498462677, "rewards/rejected": -0.1590684950351715, "step": 835 }, { "epoch": 2.20435069215557, "grad_norm": 9.383538246154785, "learning_rate": 2.136645962732919e-06, "log_odds_chosen": 0.8809219598770142, "log_odds_ratio": -0.3742610514163971, "logits/chosen": -1.103813886642456, "logits/rejected": -0.965684711933136, "logps/chosen": -1.2720234394073486, "logps/rejected": -1.9837875366210938, "loss": 1.8789, "nll_loss": 0.43230006098747253, "rewards/accuracies": 1.0, "rewards/chosen": -0.1272023618221283, "rewards/margins": 0.07117639482021332, "rewards/rejected": -0.1983787566423416, "step": 836 }, { "epoch": 2.206987475280158, "grad_norm": 9.359567642211914, "learning_rate": 2.129547471162378e-06, "log_odds_chosen": 0.563247561454773, "log_odds_ratio": -0.4659076929092407, "logits/chosen": -1.081076979637146, "logits/rejected": -1.0120054483413696, "logps/chosen": -1.1683335304260254, "logps/rejected": -1.5994642972946167, "loss": 1.9513, "nll_loss": 0.4412320852279663, "rewards/accuracies": 0.875, "rewards/chosen": -0.11683335900306702, "rewards/margins": 0.04311307519674301, "rewards/rejected": -0.15994644165039062, "step": 837 }, { "epoch": 2.2096242584047463, "grad_norm": 10.473090171813965, "learning_rate": 2.122448979591837e-06, "log_odds_chosen": 0.6541207432746887, "log_odds_ratio": -0.4413262903690338, "logits/chosen": -1.2150276899337769, "logits/rejected": -1.0857141017913818, "logps/chosen": -1.156285285949707, "logps/rejected": -1.643873691558838, "loss": 2.2774, "nll_loss": 0.5252097845077515, "rewards/accuracies": 0.875, "rewards/chosen": -0.11562854051589966, "rewards/margins": 0.04875882714986801, "rewards/rejected": -0.16438736021518707, "step": 838 }, { "epoch": 2.212261041529334, "grad_norm": 9.396411895751953, "learning_rate": 2.1153504880212958e-06, "log_odds_chosen": 0.7791802883148193, "log_odds_ratio": -0.41297250986099243, "logits/chosen": -1.1654194593429565, "logits/rejected": -1.0504658222198486, "logps/chosen": -1.3216980695724487, "logps/rejected": -1.9628183841705322, "loss": 2.2329, "nll_loss": 0.5169225931167603, "rewards/accuracies": 1.0, "rewards/chosen": -0.13216981291770935, "rewards/margins": 0.06411202251911163, "rewards/rejected": -0.19628183543682098, "step": 839 }, { "epoch": 2.214897824653922, "grad_norm": 10.38984203338623, "learning_rate": 2.108251996450754e-06, "log_odds_chosen": 0.28098800778388977, "log_odds_ratio": -0.5779528617858887, "logits/chosen": -1.0790941715240479, "logits/rejected": -1.0334962606430054, "logps/chosen": -1.303661584854126, "logps/rejected": -1.5208593606948853, "loss": 2.2238, "nll_loss": 0.4981459379196167, "rewards/accuracies": 0.625, "rewards/chosen": -0.13036617636680603, "rewards/margins": 0.02171977609395981, "rewards/rejected": -0.15208593010902405, "step": 840 }, { "epoch": 2.2175346077785103, "grad_norm": 10.593436241149902, "learning_rate": 2.1011535048802127e-06, "log_odds_chosen": 0.5783386826515198, "log_odds_ratio": -0.46727651357650757, "logits/chosen": -1.0302200317382812, "logits/rejected": -0.975492537021637, "logps/chosen": -1.274944543838501, "logps/rejected": -1.7174361944198608, "loss": 2.1627, "nll_loss": 0.4939413070678711, "rewards/accuracies": 0.875, "rewards/chosen": -0.1274944692850113, "rewards/margins": 0.044249165803194046, "rewards/rejected": -0.17174363136291504, "step": 841 }, { "epoch": 2.220171390903098, "grad_norm": 10.232915878295898, "learning_rate": 2.0940550133096716e-06, "log_odds_chosen": 0.41215968132019043, "log_odds_ratio": -0.5210889577865601, "logits/chosen": -1.1280133724212646, "logits/rejected": -1.026360273361206, "logps/chosen": -1.3107011318206787, "logps/rejected": -1.639918565750122, "loss": 2.6401, "nll_loss": 0.6079277992248535, "rewards/accuracies": 1.0, "rewards/chosen": -0.1310701072216034, "rewards/margins": 0.03292175382375717, "rewards/rejected": -0.16399186849594116, "step": 842 }, { "epoch": 2.222808174027686, "grad_norm": 9.591880798339844, "learning_rate": 2.08695652173913e-06, "log_odds_chosen": 0.5880998373031616, "log_odds_ratio": -0.46353909373283386, "logits/chosen": -1.0672273635864258, "logits/rejected": -1.0226701498031616, "logps/chosen": -1.1952481269836426, "logps/rejected": -1.648200511932373, "loss": 1.7761, "nll_loss": 0.3976776599884033, "rewards/accuracies": 0.875, "rewards/chosen": -0.11952481418848038, "rewards/margins": 0.04529523849487305, "rewards/rejected": -0.16482004523277283, "step": 843 }, { "epoch": 2.2254449571522743, "grad_norm": 9.671662330627441, "learning_rate": 2.079858030168589e-06, "log_odds_chosen": 0.4567071199417114, "log_odds_ratio": -0.5039539337158203, "logits/chosen": -1.059120535850525, "logits/rejected": -0.9793242812156677, "logps/chosen": -1.3402698040008545, "logps/rejected": -1.696657419204712, "loss": 2.0356, "nll_loss": 0.45850008726119995, "rewards/accuracies": 1.0, "rewards/chosen": -0.13402698934078217, "rewards/margins": 0.035638757050037384, "rewards/rejected": -0.16966573894023895, "step": 844 }, { "epoch": 2.2280817402768625, "grad_norm": 10.071785926818848, "learning_rate": 2.072759538598048e-06, "log_odds_chosen": 0.6569117307662964, "log_odds_ratio": -0.44372034072875977, "logits/chosen": -1.1549427509307861, "logits/rejected": -1.0164406299591064, "logps/chosen": -1.2740365266799927, "logps/rejected": -1.7857685089111328, "loss": 2.3573, "nll_loss": 0.5449540019035339, "rewards/accuracies": 0.875, "rewards/chosen": -0.12740366160869598, "rewards/margins": 0.05117318406701088, "rewards/rejected": -0.17857685685157776, "step": 845 }, { "epoch": 2.23071852340145, "grad_norm": 10.06051254272461, "learning_rate": 2.0656610470275068e-06, "log_odds_chosen": 0.7105215191841125, "log_odds_ratio": -0.4064417779445648, "logits/chosen": -1.1775195598602295, "logits/rejected": -1.0729823112487793, "logps/chosen": -1.2887108325958252, "logps/rejected": -1.845299243927002, "loss": 2.3104, "nll_loss": 0.5369586944580078, "rewards/accuracies": 1.0, "rewards/chosen": -0.12887108325958252, "rewards/margins": 0.05565885454416275, "rewards/rejected": -0.18452993035316467, "step": 846 }, { "epoch": 2.2333553065260383, "grad_norm": 10.156782150268555, "learning_rate": 2.0585625554569653e-06, "log_odds_chosen": 0.862796425819397, "log_odds_ratio": -0.40923982858657837, "logits/chosen": -1.1173951625823975, "logits/rejected": -1.020320177078247, "logps/chosen": -1.159977674484253, "logps/rejected": -1.8526980876922607, "loss": 1.9856, "nll_loss": 0.4554870128631592, "rewards/accuracies": 0.875, "rewards/chosen": -0.11599776893854141, "rewards/margins": 0.06927204132080078, "rewards/rejected": -0.1852698028087616, "step": 847 }, { "epoch": 2.235992089650626, "grad_norm": 10.295635223388672, "learning_rate": 2.051464063886424e-06, "log_odds_chosen": 0.34328746795654297, "log_odds_ratio": -0.5436752438545227, "logits/chosen": -1.054999589920044, "logits/rejected": -0.9837340116500854, "logps/chosen": -1.2793586254119873, "logps/rejected": -1.533247709274292, "loss": 2.8444, "nll_loss": 0.6567400097846985, "rewards/accuracies": 0.75, "rewards/chosen": -0.12793587148189545, "rewards/margins": 0.025388918817043304, "rewards/rejected": -0.15332478284835815, "step": 848 }, { "epoch": 2.238628872775214, "grad_norm": 9.936890602111816, "learning_rate": 2.0443655723158826e-06, "log_odds_chosen": 0.7530043125152588, "log_odds_ratio": -0.4255771040916443, "logits/chosen": -1.07007896900177, "logits/rejected": -1.0139280557632446, "logps/chosen": -1.1299421787261963, "logps/rejected": -1.6235958337783813, "loss": 2.0257, "nll_loss": 0.46387165784835815, "rewards/accuracies": 0.875, "rewards/chosen": -0.11299421638250351, "rewards/margins": 0.049365364015102386, "rewards/rejected": -0.1623595803976059, "step": 849 }, { "epoch": 2.2412656558998023, "grad_norm": 10.255937576293945, "learning_rate": 2.0372670807453415e-06, "log_odds_chosen": 0.736466646194458, "log_odds_ratio": -0.4073718786239624, "logits/chosen": -1.0956451892852783, "logits/rejected": -0.9570380449295044, "logps/chosen": -1.362380027770996, "logps/rejected": -1.9511312246322632, "loss": 2.4922, "nll_loss": 0.5823126435279846, "rewards/accuracies": 1.0, "rewards/chosen": -0.1362380087375641, "rewards/margins": 0.05887509882450104, "rewards/rejected": -0.19511312246322632, "step": 850 }, { "epoch": 2.2439024390243905, "grad_norm": 9.78884506225586, "learning_rate": 2.0301685891748e-06, "log_odds_chosen": 0.59482741355896, "log_odds_ratio": -0.4567106366157532, "logits/chosen": -1.187472939491272, "logits/rejected": -1.0651209354400635, "logps/chosen": -1.3073811531066895, "logps/rejected": -1.7776367664337158, "loss": 2.4399, "nll_loss": 0.5643101930618286, "rewards/accuracies": 1.0, "rewards/chosen": -0.13073810935020447, "rewards/margins": 0.04702557250857353, "rewards/rejected": -0.1777637004852295, "step": 851 }, { "epoch": 2.246539222148978, "grad_norm": 9.819873809814453, "learning_rate": 2.023070097604259e-06, "log_odds_chosen": 0.7717651128768921, "log_odds_ratio": -0.3990074098110199, "logits/chosen": -1.2048760652542114, "logits/rejected": -1.0654480457305908, "logps/chosen": -1.35507333278656, "logps/rejected": -1.9843974113464355, "loss": 2.5905, "nll_loss": 0.607713520526886, "rewards/accuracies": 1.0, "rewards/chosen": -0.13550734519958496, "rewards/margins": 0.06293240189552307, "rewards/rejected": -0.19843974709510803, "step": 852 }, { "epoch": 2.2491760052735663, "grad_norm": 9.984160423278809, "learning_rate": 2.015971606033718e-06, "log_odds_chosen": 0.24353592097759247, "log_odds_ratio": -0.5929281711578369, "logits/chosen": -1.041812539100647, "logits/rejected": -0.9944383502006531, "logps/chosen": -1.3887944221496582, "logps/rejected": -1.5821037292480469, "loss": 2.0191, "nll_loss": 0.4454866647720337, "rewards/accuracies": 0.625, "rewards/chosen": -0.1388794332742691, "rewards/margins": 0.019330933690071106, "rewards/rejected": -0.1582103669643402, "step": 853 }, { "epoch": 2.251812788398154, "grad_norm": 10.243196487426758, "learning_rate": 2.0088731144631767e-06, "log_odds_chosen": 0.4460301399230957, "log_odds_ratio": -0.5126500129699707, "logits/chosen": -1.0765531063079834, "logits/rejected": -0.9831266403198242, "logps/chosen": -1.4298105239868164, "logps/rejected": -1.788804054260254, "loss": 2.8298, "nll_loss": 0.6561848521232605, "rewards/accuracies": 0.75, "rewards/chosen": -0.14298105239868164, "rewards/margins": 0.035899337381124496, "rewards/rejected": -0.17888039350509644, "step": 854 }, { "epoch": 2.254449571522742, "grad_norm": 10.224044799804688, "learning_rate": 2.001774622892635e-06, "log_odds_chosen": 0.5008156895637512, "log_odds_ratio": -0.4781492352485657, "logits/chosen": -1.1945780515670776, "logits/rejected": -1.0529351234436035, "logps/chosen": -1.283106803894043, "logps/rejected": -1.6531083583831787, "loss": 2.5087, "nll_loss": 0.5793533325195312, "rewards/accuracies": 1.0, "rewards/chosen": -0.1283106803894043, "rewards/margins": 0.0370001494884491, "rewards/rejected": -0.1653108447790146, "step": 855 }, { "epoch": 2.2570863546473303, "grad_norm": 10.119146347045898, "learning_rate": 1.994676131322094e-06, "log_odds_chosen": 0.3080759644508362, "log_odds_ratio": -0.5613141059875488, "logits/chosen": -1.1984078884124756, "logits/rejected": -1.0711963176727295, "logps/chosen": -1.4382545948028564, "logps/rejected": -1.6706795692443848, "loss": 2.6115, "nll_loss": 0.5967416763305664, "rewards/accuracies": 0.875, "rewards/chosen": -0.1438254565000534, "rewards/margins": 0.023242495954036713, "rewards/rejected": -0.16706794500350952, "step": 856 }, { "epoch": 2.2597231377719185, "grad_norm": 10.42589282989502, "learning_rate": 1.9875776397515526e-06, "log_odds_chosen": 0.5139608383178711, "log_odds_ratio": -0.4912663400173187, "logits/chosen": -1.0437419414520264, "logits/rejected": -0.9561313390731812, "logps/chosen": -1.3991198539733887, "logps/rejected": -1.7931902408599854, "loss": 2.0823, "nll_loss": 0.47144123911857605, "rewards/accuracies": 0.875, "rewards/chosen": -0.13991199433803558, "rewards/margins": 0.039407022297382355, "rewards/rejected": -0.17931902408599854, "step": 857 }, { "epoch": 2.262359920896506, "grad_norm": 10.34991455078125, "learning_rate": 1.9804791481810115e-06, "log_odds_chosen": 0.501839280128479, "log_odds_ratio": -0.488447368144989, "logits/chosen": -1.0929710865020752, "logits/rejected": -1.0430017709732056, "logps/chosen": -1.2344586849212646, "logps/rejected": -1.5981402397155762, "loss": 2.4483, "nll_loss": 0.5632182359695435, "rewards/accuracies": 0.875, "rewards/chosen": -0.12344586849212646, "rewards/margins": 0.036368150264024734, "rewards/rejected": -0.1598140299320221, "step": 858 }, { "epoch": 2.2649967040210943, "grad_norm": 9.237586975097656, "learning_rate": 1.9733806566104704e-06, "log_odds_chosen": 0.7990984916687012, "log_odds_ratio": -0.3936226963996887, "logits/chosen": -1.0956703424453735, "logits/rejected": -0.9938894510269165, "logps/chosen": -1.1750367879867554, "logps/rejected": -1.7683098316192627, "loss": 1.9704, "nll_loss": 0.45324358344078064, "rewards/accuracies": 1.0, "rewards/chosen": -0.11750368773937225, "rewards/margins": 0.05932728946208954, "rewards/rejected": -0.1768309772014618, "step": 859 }, { "epoch": 2.267633487145682, "grad_norm": 10.378680229187012, "learning_rate": 1.966282165039929e-06, "log_odds_chosen": 0.8377877473831177, "log_odds_ratio": -0.4150227904319763, "logits/chosen": -1.1576271057128906, "logits/rejected": -1.0481622219085693, "logps/chosen": -1.2925989627838135, "logps/rejected": -1.9561983346939087, "loss": 2.54, "nll_loss": 0.5934931039810181, "rewards/accuracies": 1.0, "rewards/chosen": -0.1292598843574524, "rewards/margins": 0.06635995954275131, "rewards/rejected": -0.1956198513507843, "step": 860 }, { "epoch": 2.27027027027027, "grad_norm": 10.77652359008789, "learning_rate": 1.9591836734693877e-06, "log_odds_chosen": 0.5458847880363464, "log_odds_ratio": -0.4712611138820648, "logits/chosen": -1.0664516687393188, "logits/rejected": -1.0485702753067017, "logps/chosen": -1.309433937072754, "logps/rejected": -1.7289760112762451, "loss": 2.7616, "nll_loss": 0.6432833075523376, "rewards/accuracies": 1.0, "rewards/chosen": -0.1309433877468109, "rewards/margins": 0.04195421189069748, "rewards/rejected": -0.172897607088089, "step": 861 }, { "epoch": 2.2729070533948583, "grad_norm": 9.428654670715332, "learning_rate": 1.952085181898846e-06, "log_odds_chosen": 0.5159881114959717, "log_odds_ratio": -0.5256285071372986, "logits/chosen": -1.1286839246749878, "logits/rejected": -1.0185039043426514, "logps/chosen": -1.3016605377197266, "logps/rejected": -1.7159929275512695, "loss": 2.1145, "nll_loss": 0.4760742783546448, "rewards/accuracies": 0.75, "rewards/chosen": -0.13016605377197266, "rewards/margins": 0.041433244943618774, "rewards/rejected": -0.17159929871559143, "step": 862 }, { "epoch": 2.2755438365194465, "grad_norm": 10.207335472106934, "learning_rate": 1.944986690328305e-06, "log_odds_chosen": 0.3654537796974182, "log_odds_ratio": -0.5408401489257812, "logits/chosen": -1.084734320640564, "logits/rejected": -0.9910352230072021, "logps/chosen": -1.3164527416229248, "logps/rejected": -1.6015342473983765, "loss": 2.1288, "nll_loss": 0.4781232476234436, "rewards/accuracies": 0.875, "rewards/chosen": -0.1316452920436859, "rewards/margins": 0.02850813791155815, "rewards/rejected": -0.16015341877937317, "step": 863 }, { "epoch": 2.278180619644034, "grad_norm": 10.110044479370117, "learning_rate": 1.937888198757764e-06, "log_odds_chosen": 0.6507178544998169, "log_odds_ratio": -0.4426138401031494, "logits/chosen": -1.0788090229034424, "logits/rejected": -1.0549869537353516, "logps/chosen": -1.224971890449524, "logps/rejected": -1.730021357536316, "loss": 2.0642, "nll_loss": 0.4717872142791748, "rewards/accuracies": 0.875, "rewards/chosen": -0.12249719351530075, "rewards/margins": 0.05050494521856308, "rewards/rejected": -0.17300213873386383, "step": 864 }, { "epoch": 2.2808174027686223, "grad_norm": 10.79237174987793, "learning_rate": 1.9307897071872225e-06, "log_odds_chosen": 0.42231541872024536, "log_odds_ratio": -0.5157482624053955, "logits/chosen": -1.1129214763641357, "logits/rejected": -1.0310511589050293, "logps/chosen": -1.4913139343261719, "logps/rejected": -1.8260332345962524, "loss": 2.5362, "nll_loss": 0.5824636220932007, "rewards/accuracies": 0.875, "rewards/chosen": -0.1491314172744751, "rewards/margins": 0.03347191959619522, "rewards/rejected": -0.18260332942008972, "step": 865 }, { "epoch": 2.2834541858932105, "grad_norm": 10.123908042907715, "learning_rate": 1.9236912156166814e-06, "log_odds_chosen": 0.6933606266975403, "log_odds_ratio": -0.48170340061187744, "logits/chosen": -1.1296451091766357, "logits/rejected": -1.0230624675750732, "logps/chosen": -1.3490564823150635, "logps/rejected": -1.9312152862548828, "loss": 2.2868, "nll_loss": 0.5235217213630676, "rewards/accuracies": 0.75, "rewards/chosen": -0.1349056512117386, "rewards/margins": 0.058215878903865814, "rewards/rejected": -0.1931215226650238, "step": 866 }, { "epoch": 2.286090969017798, "grad_norm": 9.657790184020996, "learning_rate": 1.9165927240461403e-06, "log_odds_chosen": 0.7490557432174683, "log_odds_ratio": -0.43284159898757935, "logits/chosen": -1.0705084800720215, "logits/rejected": -1.0278089046478271, "logps/chosen": -1.119507074356079, "logps/rejected": -1.552225112915039, "loss": 1.8637, "nll_loss": 0.4226509630680084, "rewards/accuracies": 0.875, "rewards/chosen": -0.11195070296525955, "rewards/margins": 0.04327181726694107, "rewards/rejected": -0.15522252023220062, "step": 867 }, { "epoch": 2.2887277521423863, "grad_norm": 9.32958698272705, "learning_rate": 1.9094942324755988e-06, "log_odds_chosen": 0.49227672815322876, "log_odds_ratio": -0.4879855215549469, "logits/chosen": -1.1634224653244019, "logits/rejected": -1.050025224685669, "logps/chosen": -1.2189109325408936, "logps/rejected": -1.592963695526123, "loss": 2.2461, "nll_loss": 0.5127338171005249, "rewards/accuracies": 0.875, "rewards/chosen": -0.1218910962343216, "rewards/margins": 0.03740527853369713, "rewards/rejected": -0.15929636359214783, "step": 868 }, { "epoch": 2.2913645352669745, "grad_norm": 9.668261528015137, "learning_rate": 1.9023957409050575e-06, "log_odds_chosen": 0.5143749117851257, "log_odds_ratio": -0.4800008535385132, "logits/chosen": -1.0946844816207886, "logits/rejected": -1.011817455291748, "logps/chosen": -1.240984320640564, "logps/rejected": -1.6015645265579224, "loss": 2.0224, "nll_loss": 0.45759451389312744, "rewards/accuracies": 1.0, "rewards/chosen": -0.12409843504428864, "rewards/margins": 0.03605801984667778, "rewards/rejected": -0.1601564586162567, "step": 869 }, { "epoch": 2.294001318391562, "grad_norm": 10.814599990844727, "learning_rate": 1.8952972493345164e-06, "log_odds_chosen": 0.4014270603656769, "log_odds_ratio": -0.5303521156311035, "logits/chosen": -1.0695867538452148, "logits/rejected": -0.933932363986969, "logps/chosen": -1.5640766620635986, "logps/rejected": -1.8934946060180664, "loss": 3.2221, "nll_loss": 0.7524948716163635, "rewards/accuracies": 0.875, "rewards/chosen": -0.1564076691865921, "rewards/margins": 0.032941803336143494, "rewards/rejected": -0.1893494725227356, "step": 870 }, { "epoch": 2.2966381015161503, "grad_norm": 10.645783424377441, "learning_rate": 1.888198757763975e-06, "log_odds_chosen": 0.5395257472991943, "log_odds_ratio": -0.4920005202293396, "logits/chosen": -1.184718132019043, "logits/rejected": -1.0663403272628784, "logps/chosen": -1.4322577714920044, "logps/rejected": -1.8459181785583496, "loss": 2.6386, "nll_loss": 0.6104428768157959, "rewards/accuracies": 0.875, "rewards/chosen": -0.1432257890701294, "rewards/margins": 0.04136602580547333, "rewards/rejected": -0.18459181487560272, "step": 871 }, { "epoch": 2.2992748846407385, "grad_norm": 10.148241996765137, "learning_rate": 1.881100266193434e-06, "log_odds_chosen": 0.4539548456668854, "log_odds_ratio": -0.506780743598938, "logits/chosen": -1.1411820650100708, "logits/rejected": -1.0451831817626953, "logps/chosen": -1.4123101234436035, "logps/rejected": -1.7676352262496948, "loss": 2.3386, "nll_loss": 0.5339686870574951, "rewards/accuracies": 0.875, "rewards/chosen": -0.1412310153245926, "rewards/margins": 0.03553250432014465, "rewards/rejected": -0.17676350474357605, "step": 872 }, { "epoch": 2.301911667765326, "grad_norm": 10.121185302734375, "learning_rate": 1.8740017746228924e-06, "log_odds_chosen": 0.5971920490264893, "log_odds_ratio": -0.4546307325363159, "logits/chosen": -1.1044751405715942, "logits/rejected": -1.0192352533340454, "logps/chosen": -1.2726118564605713, "logps/rejected": -1.7381761074066162, "loss": 2.374, "nll_loss": 0.548031210899353, "rewards/accuracies": 1.0, "rewards/chosen": -0.1272611767053604, "rewards/margins": 0.046556420624256134, "rewards/rejected": -0.17381760478019714, "step": 873 }, { "epoch": 2.3045484508899143, "grad_norm": 10.169872283935547, "learning_rate": 1.8669032830523513e-06, "log_odds_chosen": 0.30267369747161865, "log_odds_ratio": -0.5743823051452637, "logits/chosen": -1.1027439832687378, "logits/rejected": -1.0398067235946655, "logps/chosen": -1.3280037641525269, "logps/rejected": -1.5539779663085938, "loss": 2.1782, "nll_loss": 0.487112432718277, "rewards/accuracies": 0.625, "rewards/chosen": -0.1328003704547882, "rewards/margins": 0.022597430273890495, "rewards/rejected": -0.15539780259132385, "step": 874 }, { "epoch": 2.3071852340145025, "grad_norm": 9.782196044921875, "learning_rate": 1.85980479148181e-06, "log_odds_chosen": 0.9248654842376709, "log_odds_ratio": -0.34867754578590393, "logits/chosen": -1.1500089168548584, "logits/rejected": -0.9892420768737793, "logps/chosen": -1.2782080173492432, "logps/rejected": -2.0070817470550537, "loss": 2.2703, "nll_loss": 0.5327146053314209, "rewards/accuracies": 1.0, "rewards/chosen": -0.12782080471515656, "rewards/margins": 0.0728873685002327, "rewards/rejected": -0.20070818066596985, "step": 875 }, { "epoch": 2.30982201713909, "grad_norm": 10.457931518554688, "learning_rate": 1.852706299911269e-06, "log_odds_chosen": 0.48485198616981506, "log_odds_ratio": -0.4915451109409332, "logits/chosen": -1.1804633140563965, "logits/rejected": -1.0634907484054565, "logps/chosen": -1.4077634811401367, "logps/rejected": -1.787198781967163, "loss": 2.454, "nll_loss": 0.5643399357795715, "rewards/accuracies": 0.875, "rewards/chosen": -0.1407763659954071, "rewards/margins": 0.037943534553050995, "rewards/rejected": -0.1787198781967163, "step": 876 }, { "epoch": 2.3124588002636783, "grad_norm": 9.764825820922852, "learning_rate": 1.8456078083407276e-06, "log_odds_chosen": 0.6608742475509644, "log_odds_ratio": -0.4454282522201538, "logits/chosen": -1.0784417390823364, "logits/rejected": -1.0097901821136475, "logps/chosen": -1.1584433317184448, "logps/rejected": -1.6725659370422363, "loss": 2.0073, "nll_loss": 0.45727962255477905, "rewards/accuracies": 1.0, "rewards/chosen": -0.11584433913230896, "rewards/margins": 0.05141226202249527, "rewards/rejected": -0.16725659370422363, "step": 877 }, { "epoch": 2.3150955833882665, "grad_norm": 10.151506423950195, "learning_rate": 1.838509316770186e-06, "log_odds_chosen": 0.6765606999397278, "log_odds_ratio": -0.4168528616428375, "logits/chosen": -1.1900181770324707, "logits/rejected": -1.0964360237121582, "logps/chosen": -1.3045673370361328, "logps/rejected": -1.8341665267944336, "loss": 2.5437, "nll_loss": 0.5942333340644836, "rewards/accuracies": 1.0, "rewards/chosen": -0.13045673072338104, "rewards/margins": 0.05295991897583008, "rewards/rejected": -0.18341666460037231, "step": 878 }, { "epoch": 2.317732366512854, "grad_norm": 10.592248916625977, "learning_rate": 1.831410825199645e-06, "log_odds_chosen": 0.5475701689720154, "log_odds_ratio": -0.4736286997795105, "logits/chosen": -1.0685279369354248, "logits/rejected": -1.0304744243621826, "logps/chosen": -1.4587361812591553, "logps/rejected": -1.8997396230697632, "loss": 2.1521, "nll_loss": 0.490667462348938, "rewards/accuracies": 0.875, "rewards/chosen": -0.14587360620498657, "rewards/margins": 0.04410037025809288, "rewards/rejected": -0.18997396528720856, "step": 879 }, { "epoch": 2.3203691496374423, "grad_norm": 9.69548511505127, "learning_rate": 1.8243123336291037e-06, "log_odds_chosen": 0.5831120014190674, "log_odds_ratio": -0.45221713185310364, "logits/chosen": -1.1400697231292725, "logits/rejected": -1.0067840814590454, "logps/chosen": -1.306548833847046, "logps/rejected": -1.7670178413391113, "loss": 2.2661, "nll_loss": 0.5212967395782471, "rewards/accuracies": 1.0, "rewards/chosen": -0.13065488636493683, "rewards/margins": 0.0460469052195549, "rewards/rejected": -0.17670178413391113, "step": 880 }, { "epoch": 2.3230059327620305, "grad_norm": 9.943262100219727, "learning_rate": 1.8172138420585626e-06, "log_odds_chosen": 0.46555984020233154, "log_odds_ratio": -0.49573034048080444, "logits/chosen": -1.1484509706497192, "logits/rejected": -1.0376181602478027, "logps/chosen": -1.2022852897644043, "logps/rejected": -1.5464811325073242, "loss": 2.1417, "nll_loss": 0.48586392402648926, "rewards/accuracies": 1.0, "rewards/chosen": -0.12022852897644043, "rewards/margins": 0.03441959619522095, "rewards/rejected": -0.15464811027050018, "step": 881 }, { "epoch": 2.325642715886618, "grad_norm": 10.272644996643066, "learning_rate": 1.810115350488021e-06, "log_odds_chosen": 0.6326010823249817, "log_odds_ratio": -0.44564175605773926, "logits/chosen": -1.1390728950500488, "logits/rejected": -1.0238792896270752, "logps/chosen": -1.4400683641433716, "logps/rejected": -1.9388606548309326, "loss": 2.6395, "nll_loss": 0.6153170466423035, "rewards/accuracies": 1.0, "rewards/chosen": -0.1440068483352661, "rewards/margins": 0.04987923428416252, "rewards/rejected": -0.19388607144355774, "step": 882 }, { "epoch": 2.3282794990112063, "grad_norm": 10.139723777770996, "learning_rate": 1.80301685891748e-06, "log_odds_chosen": 0.6296920776367188, "log_odds_ratio": -0.453926682472229, "logits/chosen": -1.1161080598831177, "logits/rejected": -1.0118201971054077, "logps/chosen": -1.3150339126586914, "logps/rejected": -1.821239709854126, "loss": 2.1842, "nll_loss": 0.5006626844406128, "rewards/accuracies": 1.0, "rewards/chosen": -0.1315034031867981, "rewards/margins": 0.050620581954717636, "rewards/rejected": -0.18212398886680603, "step": 883 }, { "epoch": 2.3309162821357945, "grad_norm": 9.526472091674805, "learning_rate": 1.7959183673469386e-06, "log_odds_chosen": 0.2386457324028015, "log_odds_ratio": -0.595291256904602, "logits/chosen": -1.0480625629425049, "logits/rejected": -1.0287106037139893, "logps/chosen": -1.3027513027191162, "logps/rejected": -1.4972296953201294, "loss": 1.9034, "nll_loss": 0.4163215458393097, "rewards/accuracies": 0.875, "rewards/chosen": -0.13027513027191162, "rewards/margins": 0.019447840750217438, "rewards/rejected": -0.14972296357154846, "step": 884 }, { "epoch": 2.333553065260382, "grad_norm": 10.50646686553955, "learning_rate": 1.7888198757763975e-06, "log_odds_chosen": 0.2895394563674927, "log_odds_ratio": -0.5652808547019958, "logits/chosen": -1.111433982849121, "logits/rejected": -1.0047237873077393, "logps/chosen": -1.5478146076202393, "logps/rejected": -1.776818037033081, "loss": 2.7286, "nll_loss": 0.6256152391433716, "rewards/accuracies": 0.875, "rewards/chosen": -0.15478146076202393, "rewards/margins": 0.022900333628058434, "rewards/rejected": -0.1776818037033081, "step": 885 }, { "epoch": 2.3361898483849703, "grad_norm": 9.655818939208984, "learning_rate": 1.7817213842058562e-06, "log_odds_chosen": 0.3944101333618164, "log_odds_ratio": -0.5261645317077637, "logits/chosen": -1.1105564832687378, "logits/rejected": -1.048958420753479, "logps/chosen": -1.366010069847107, "logps/rejected": -1.6764073371887207, "loss": 2.5216, "nll_loss": 0.5777935981750488, "rewards/accuracies": 0.875, "rewards/chosen": -0.1366010159254074, "rewards/margins": 0.03103972040116787, "rewards/rejected": -0.16764073073863983, "step": 886 }, { "epoch": 2.3388266315095585, "grad_norm": 9.845623970031738, "learning_rate": 1.7746228926353149e-06, "log_odds_chosen": 0.5777924060821533, "log_odds_ratio": -0.46263208985328674, "logits/chosen": -1.0652246475219727, "logits/rejected": -0.9925827980041504, "logps/chosen": -1.2034990787506104, "logps/rejected": -1.636778473854065, "loss": 2.2739, "nll_loss": 0.5222036838531494, "rewards/accuracies": 1.0, "rewards/chosen": -0.12034991383552551, "rewards/margins": 0.0433279350399971, "rewards/rejected": -0.16367784142494202, "step": 887 }, { "epoch": 2.341463414634146, "grad_norm": 9.432107925415039, "learning_rate": 1.7675244010647736e-06, "log_odds_chosen": 0.5671384334564209, "log_odds_ratio": -0.4698998034000397, "logits/chosen": -1.0846713781356812, "logits/rejected": -0.9662580490112305, "logps/chosen": -1.3929511308670044, "logps/rejected": -1.8406968116760254, "loss": 2.1215, "nll_loss": 0.4833735525608063, "rewards/accuracies": 1.0, "rewards/chosen": -0.13929511606693268, "rewards/margins": 0.04477456212043762, "rewards/rejected": -0.1840696781873703, "step": 888 }, { "epoch": 2.3441001977587343, "grad_norm": 9.808156967163086, "learning_rate": 1.7604259094942325e-06, "log_odds_chosen": 0.5133799314498901, "log_odds_ratio": -0.4915502965450287, "logits/chosen": -1.0829869508743286, "logits/rejected": -1.058847427368164, "logps/chosen": -1.174734354019165, "logps/rejected": -1.5203860998153687, "loss": 1.9699, "nll_loss": 0.4433155357837677, "rewards/accuracies": 1.0, "rewards/chosen": -0.11747344583272934, "rewards/margins": 0.03456517308950424, "rewards/rejected": -0.1520386040210724, "step": 889 }, { "epoch": 2.3467369808833225, "grad_norm": 9.102179527282715, "learning_rate": 1.7533274179236912e-06, "log_odds_chosen": 0.6991630792617798, "log_odds_ratio": -0.4420987665653229, "logits/chosen": -0.9580685496330261, "logits/rejected": -0.9406499862670898, "logps/chosen": -1.1506810188293457, "logps/rejected": -1.6416935920715332, "loss": 1.7795, "nll_loss": 0.4006742238998413, "rewards/accuracies": 0.875, "rewards/chosen": -0.11506810039281845, "rewards/margins": 0.04910125583410263, "rewards/rejected": -0.16416935622692108, "step": 890 }, { "epoch": 2.34937376400791, "grad_norm": 10.316890716552734, "learning_rate": 1.7462289263531499e-06, "log_odds_chosen": 0.6008606553077698, "log_odds_ratio": -0.45902445912361145, "logits/chosen": -1.0788568258285522, "logits/rejected": -1.0219684839248657, "logps/chosen": -1.3049077987670898, "logps/rejected": -1.7716357707977295, "loss": 2.3578, "nll_loss": 0.543548047542572, "rewards/accuracies": 1.0, "rewards/chosen": -0.13049077987670898, "rewards/margins": 0.04667280614376068, "rewards/rejected": -0.17716357111930847, "step": 891 }, { "epoch": 2.3520105471324984, "grad_norm": 9.912158966064453, "learning_rate": 1.7391304347826085e-06, "log_odds_chosen": 0.5222499370574951, "log_odds_ratio": -0.4807077646255493, "logits/chosen": -1.194288730621338, "logits/rejected": -1.010504961013794, "logps/chosen": -1.3184592723846436, "logps/rejected": -1.737593650817871, "loss": 2.651, "nll_loss": 0.6146837472915649, "rewards/accuracies": 1.0, "rewards/chosen": -0.13184592127799988, "rewards/margins": 0.04191344231367111, "rewards/rejected": -0.17375938594341278, "step": 892 }, { "epoch": 2.3546473302570865, "grad_norm": 11.46944808959961, "learning_rate": 1.7320319432120674e-06, "log_odds_chosen": 0.4674687385559082, "log_odds_ratio": -0.5207874178886414, "logits/chosen": -1.0839996337890625, "logits/rejected": -1.0142912864685059, "logps/chosen": -1.7729079723358154, "logps/rejected": -2.1121439933776855, "loss": 3.5885, "nll_loss": 0.8450548648834229, "rewards/accuracies": 0.625, "rewards/chosen": -0.17729079723358154, "rewards/margins": 0.03392360359430313, "rewards/rejected": -0.21121439337730408, "step": 893 }, { "epoch": 2.357284113381674, "grad_norm": 9.610631942749023, "learning_rate": 1.7249334516415261e-06, "log_odds_chosen": 0.5872002243995667, "log_odds_ratio": -0.4672238826751709, "logits/chosen": -1.069164752960205, "logits/rejected": -1.022336721420288, "logps/chosen": -1.069106101989746, "logps/rejected": -1.5121127367019653, "loss": 1.986, "nll_loss": 0.44977521896362305, "rewards/accuracies": 0.875, "rewards/chosen": -0.10691061615943909, "rewards/margins": 0.044300660490989685, "rewards/rejected": -0.15121127665042877, "step": 894 }, { "epoch": 2.3599208965062624, "grad_norm": 10.619580268859863, "learning_rate": 1.717834960070985e-06, "log_odds_chosen": 0.432054340839386, "log_odds_ratio": -0.5096228122711182, "logits/chosen": -1.0840938091278076, "logits/rejected": -1.0283002853393555, "logps/chosen": -1.402572512626648, "logps/rejected": -1.734330177307129, "loss": 2.6856, "nll_loss": 0.62043297290802, "rewards/accuracies": 1.0, "rewards/chosen": -0.14025725424289703, "rewards/margins": 0.0331757590174675, "rewards/rejected": -0.17343303561210632, "step": 895 }, { "epoch": 2.3625576796308505, "grad_norm": 10.408077239990234, "learning_rate": 1.7107364685004435e-06, "log_odds_chosen": 0.7202540636062622, "log_odds_ratio": -0.4119204878807068, "logits/chosen": -1.1463948488235474, "logits/rejected": -1.0047610998153687, "logps/chosen": -1.3519786596298218, "logps/rejected": -1.93733811378479, "loss": 2.2873, "nll_loss": 0.530625581741333, "rewards/accuracies": 1.0, "rewards/chosen": -0.13519787788391113, "rewards/margins": 0.05853593349456787, "rewards/rejected": -0.193733811378479, "step": 896 }, { "epoch": 2.365194462755438, "grad_norm": 9.422696113586426, "learning_rate": 1.7036379769299022e-06, "log_odds_chosen": 0.9097142219543457, "log_odds_ratio": -0.3858075737953186, "logits/chosen": -1.0098564624786377, "logits/rejected": -0.9362455010414124, "logps/chosen": -1.2319567203521729, "logps/rejected": -1.968179702758789, "loss": 2.0434, "nll_loss": 0.47226881980895996, "rewards/accuracies": 0.875, "rewards/chosen": -0.12319567054510117, "rewards/margins": 0.07362228631973267, "rewards/rejected": -0.19681797921657562, "step": 897 }, { "epoch": 2.3678312458800264, "grad_norm": 10.891953468322754, "learning_rate": 1.696539485359361e-06, "log_odds_chosen": 0.637458086013794, "log_odds_ratio": -0.4614834189414978, "logits/chosen": -1.1466891765594482, "logits/rejected": -1.0769871473312378, "logps/chosen": -1.287334680557251, "logps/rejected": -1.781071424484253, "loss": 2.4553, "nll_loss": 0.5676878094673157, "rewards/accuracies": 1.0, "rewards/chosen": -0.12873347103595734, "rewards/margins": 0.04937367141246796, "rewards/rejected": -0.1781071424484253, "step": 898 }, { "epoch": 2.3704680290046145, "grad_norm": 10.109067916870117, "learning_rate": 1.6894409937888198e-06, "log_odds_chosen": 0.29623594880104065, "log_odds_ratio": -0.5727648735046387, "logits/chosen": -1.159841775894165, "logits/rejected": -1.1025397777557373, "logps/chosen": -1.2897531986236572, "logps/rejected": -1.522739291191101, "loss": 2.1762, "nll_loss": 0.486769437789917, "rewards/accuracies": 0.75, "rewards/chosen": -0.12897531688213348, "rewards/margins": 0.02329862117767334, "rewards/rejected": -0.15227393805980682, "step": 899 }, { "epoch": 2.373104812129202, "grad_norm": 10.160222053527832, "learning_rate": 1.6823425022182785e-06, "log_odds_chosen": 0.7126874923706055, "log_odds_ratio": -0.4103580713272095, "logits/chosen": -1.09098219871521, "logits/rejected": -1.0015114545822144, "logps/chosen": -1.2328615188598633, "logps/rejected": -1.7861967086791992, "loss": 1.919, "nll_loss": 0.4387224018573761, "rewards/accuracies": 1.0, "rewards/chosen": -0.1232861578464508, "rewards/margins": 0.05533352866768837, "rewards/rejected": -0.17861968278884888, "step": 900 }, { "epoch": 2.3757415952537904, "grad_norm": 10.40146255493164, "learning_rate": 1.6752440106477372e-06, "log_odds_chosen": 0.5050216317176819, "log_odds_ratio": -0.4958907961845398, "logits/chosen": -1.119974136352539, "logits/rejected": -1.0149524211883545, "logps/chosen": -1.3021442890167236, "logps/rejected": -1.7089707851409912, "loss": 2.3676, "nll_loss": 0.5423151254653931, "rewards/accuracies": 0.75, "rewards/chosen": -0.13021443784236908, "rewards/margins": 0.04068264365196228, "rewards/rejected": -0.17089708149433136, "step": 901 }, { "epoch": 2.3783783783783785, "grad_norm": 9.469189643859863, "learning_rate": 1.668145519077196e-06, "log_odds_chosen": 0.766099750995636, "log_odds_ratio": -0.4818335771560669, "logits/chosen": -1.0585970878601074, "logits/rejected": -1.0308507680892944, "logps/chosen": -1.2359730005264282, "logps/rejected": -1.876680612564087, "loss": 1.7823, "nll_loss": 0.39740118384361267, "rewards/accuracies": 0.875, "rewards/chosen": -0.12359730154275894, "rewards/margins": 0.06407077610492706, "rewards/rejected": -0.1876680850982666, "step": 902 }, { "epoch": 2.381015161502966, "grad_norm": 10.153940200805664, "learning_rate": 1.6610470275066547e-06, "log_odds_chosen": 0.48691484332084656, "log_odds_ratio": -0.497551828622818, "logits/chosen": -1.1616532802581787, "logits/rejected": -1.0509796142578125, "logps/chosen": -1.2466967105865479, "logps/rejected": -1.6137733459472656, "loss": 2.1945, "nll_loss": 0.49886205792427063, "rewards/accuracies": 1.0, "rewards/chosen": -0.12466967850923538, "rewards/margins": 0.03670765459537506, "rewards/rejected": -0.16137734055519104, "step": 903 }, { "epoch": 2.3836519446275544, "grad_norm": 10.649412155151367, "learning_rate": 1.6539485359361136e-06, "log_odds_chosen": 0.5018337965011597, "log_odds_ratio": -0.4802248477935791, "logits/chosen": -1.1726360321044922, "logits/rejected": -1.0210614204406738, "logps/chosen": -1.3647270202636719, "logps/rejected": -1.753767728805542, "loss": 2.3417, "nll_loss": 0.5373994708061218, "rewards/accuracies": 0.875, "rewards/chosen": -0.1364727020263672, "rewards/margins": 0.03890407457947731, "rewards/rejected": -0.1753767728805542, "step": 904 }, { "epoch": 2.3862887277521425, "grad_norm": 9.40090274810791, "learning_rate": 1.6468500443655721e-06, "log_odds_chosen": 0.472876638174057, "log_odds_ratio": -0.4991152286529541, "logits/chosen": -1.1030107736587524, "logits/rejected": -0.9815706014633179, "logps/chosen": -1.3572287559509277, "logps/rejected": -1.733076810836792, "loss": 2.1231, "nll_loss": 0.480863094329834, "rewards/accuracies": 0.75, "rewards/chosen": -0.13572287559509277, "rewards/margins": 0.0375848188996315, "rewards/rejected": -0.17330768704414368, "step": 905 }, { "epoch": 2.38892551087673, "grad_norm": 9.037947654724121, "learning_rate": 1.639751552795031e-06, "log_odds_chosen": 0.7296708822250366, "log_odds_ratio": -0.4820438027381897, "logits/chosen": -1.0791407823562622, "logits/rejected": -1.0010749101638794, "logps/chosen": -1.302752137184143, "logps/rejected": -1.9282175302505493, "loss": 1.9242, "nll_loss": 0.43284353613853455, "rewards/accuracies": 1.0, "rewards/chosen": -0.13027521967887878, "rewards/margins": 0.06254653632640839, "rewards/rejected": -0.19282175600528717, "step": 906 }, { "epoch": 2.3915622940013184, "grad_norm": 10.885642051696777, "learning_rate": 1.6326530612244897e-06, "log_odds_chosen": 0.5610907673835754, "log_odds_ratio": -0.4697563946247101, "logits/chosen": -1.0593876838684082, "logits/rejected": -0.9969384670257568, "logps/chosen": -1.23292875289917, "logps/rejected": -1.662524700164795, "loss": 2.425, "nll_loss": 0.5592802166938782, "rewards/accuracies": 1.0, "rewards/chosen": -0.12329287827014923, "rewards/margins": 0.04295959323644638, "rewards/rejected": -0.16625246405601501, "step": 907 }, { "epoch": 2.3941990771259065, "grad_norm": 9.862154960632324, "learning_rate": 1.6255545696539486e-06, "log_odds_chosen": 0.6774505376815796, "log_odds_ratio": -0.4316444396972656, "logits/chosen": -1.110433578491211, "logits/rejected": -1.0029642581939697, "logps/chosen": -1.3049352169036865, "logps/rejected": -1.8275152444839478, "loss": 1.94, "nll_loss": 0.4418395161628723, "rewards/accuracies": 1.0, "rewards/chosen": -0.13049352169036865, "rewards/margins": 0.05225801095366478, "rewards/rejected": -0.18275153636932373, "step": 908 }, { "epoch": 2.396835860250494, "grad_norm": 10.400384902954102, "learning_rate": 1.618456078083407e-06, "log_odds_chosen": 0.6197330951690674, "log_odds_ratio": -0.43622320890426636, "logits/chosen": -1.1227972507476807, "logits/rejected": -1.0015300512313843, "logps/chosen": -1.4339704513549805, "logps/rejected": -1.9296571016311646, "loss": 2.5103, "nll_loss": 0.5839563608169556, "rewards/accuracies": 1.0, "rewards/chosen": -0.14339704811573029, "rewards/margins": 0.049568649381399155, "rewards/rejected": -0.19296568632125854, "step": 909 }, { "epoch": 2.3994726433750824, "grad_norm": 9.41834831237793, "learning_rate": 1.611357586512866e-06, "log_odds_chosen": 0.865503191947937, "log_odds_ratio": -0.3612247407436371, "logits/chosen": -1.0651895999908447, "logits/rejected": -0.9995630979537964, "logps/chosen": -1.1853220462799072, "logps/rejected": -1.8519682884216309, "loss": 1.755, "nll_loss": 0.4026201367378235, "rewards/accuracies": 1.0, "rewards/chosen": -0.118532195687294, "rewards/margins": 0.06666462123394012, "rewards/rejected": -0.18519681692123413, "step": 910 }, { "epoch": 2.4021094264996705, "grad_norm": 10.072117805480957, "learning_rate": 1.6042590949423247e-06, "log_odds_chosen": 0.4887821674346924, "log_odds_ratio": -0.4884355664253235, "logits/chosen": -1.1391323804855347, "logits/rejected": -1.0412737131118774, "logps/chosen": -1.2391932010650635, "logps/rejected": -1.613529086112976, "loss": 2.2534, "nll_loss": 0.5145063996315002, "rewards/accuracies": 1.0, "rewards/chosen": -0.12391932308673859, "rewards/margins": 0.03743358328938484, "rewards/rejected": -0.16135290265083313, "step": 911 }, { "epoch": 2.4047462096242582, "grad_norm": 9.537360191345215, "learning_rate": 1.5971606033717836e-06, "log_odds_chosen": 0.699766993522644, "log_odds_ratio": -0.4318753480911255, "logits/chosen": -1.1274890899658203, "logits/rejected": -0.9987161159515381, "logps/chosen": -1.1979037523269653, "logps/rejected": -1.722472906112671, "loss": 1.9952, "nll_loss": 0.45561331510543823, "rewards/accuracies": 1.0, "rewards/chosen": -0.11979037523269653, "rewards/margins": 0.05245692655444145, "rewards/rejected": -0.1722472906112671, "step": 912 }, { "epoch": 2.4073829927488464, "grad_norm": 9.310301780700684, "learning_rate": 1.590062111801242e-06, "log_odds_chosen": 0.675567090511322, "log_odds_ratio": -0.4486117660999298, "logits/chosen": -1.030418038368225, "logits/rejected": -1.0064417123794556, "logps/chosen": -1.0173966884613037, "logps/rejected": -1.495659351348877, "loss": 1.8953, "nll_loss": 0.4289514422416687, "rewards/accuracies": 1.0, "rewards/chosen": -0.10173968225717545, "rewards/margins": 0.04782627522945404, "rewards/rejected": -0.1495659351348877, "step": 913 }, { "epoch": 2.4100197758734345, "grad_norm": 9.618062973022461, "learning_rate": 1.582963620230701e-06, "log_odds_chosen": 0.5738446116447449, "log_odds_ratio": -0.4660855829715729, "logits/chosen": -1.1193753480911255, "logits/rejected": -1.0765951871871948, "logps/chosen": -1.2046260833740234, "logps/rejected": -1.6074562072753906, "loss": 2.2378, "nll_loss": 0.5128461122512817, "rewards/accuracies": 1.0, "rewards/chosen": -0.12046261876821518, "rewards/margins": 0.04028301313519478, "rewards/rejected": -0.16074562072753906, "step": 914 }, { "epoch": 2.4126565589980222, "grad_norm": 11.003042221069336, "learning_rate": 1.5758651286601596e-06, "log_odds_chosen": 0.3635341227054596, "log_odds_ratio": -0.5497903823852539, "logits/chosen": -1.0701490640640259, "logits/rejected": -0.9974640607833862, "logps/chosen": -1.3847324848175049, "logps/rejected": -1.6702008247375488, "loss": 2.0758, "nll_loss": 0.46398162841796875, "rewards/accuracies": 0.875, "rewards/chosen": -0.1384732574224472, "rewards/margins": 0.02854682132601738, "rewards/rejected": -0.16702008247375488, "step": 915 }, { "epoch": 2.4152933421226104, "grad_norm": 10.645037651062012, "learning_rate": 1.5687666370896183e-06, "log_odds_chosen": 0.6083018183708191, "log_odds_ratio": -0.4520820677280426, "logits/chosen": -1.2035808563232422, "logits/rejected": -1.0772027969360352, "logps/chosen": -1.566821813583374, "logps/rejected": -2.0758349895477295, "loss": 2.8514, "nll_loss": 0.6676511168479919, "rewards/accuracies": 1.0, "rewards/chosen": -0.15668217837810516, "rewards/margins": 0.05090133473277092, "rewards/rejected": -0.20758351683616638, "step": 916 }, { "epoch": 2.4179301252471985, "grad_norm": 10.445541381835938, "learning_rate": 1.5616681455190772e-06, "log_odds_chosen": 0.2778787314891815, "log_odds_ratio": -0.5730139017105103, "logits/chosen": -1.122854232788086, "logits/rejected": -1.0918922424316406, "logps/chosen": -1.3809865713119507, "logps/rejected": -1.6034111976623535, "loss": 2.2505, "nll_loss": 0.5053151249885559, "rewards/accuracies": 1.0, "rewards/chosen": -0.13809865713119507, "rewards/margins": 0.022242471575737, "rewards/rejected": -0.16034114360809326, "step": 917 }, { "epoch": 2.4205669083717862, "grad_norm": 10.097089767456055, "learning_rate": 1.5545696539485357e-06, "log_odds_chosen": 0.3447312116622925, "log_odds_ratio": -0.5569623112678528, "logits/chosen": -1.0964230298995972, "logits/rejected": -1.018293857574463, "logps/chosen": -1.2689425945281982, "logps/rejected": -1.5412919521331787, "loss": 2.3839, "nll_loss": 0.5402828454971313, "rewards/accuracies": 0.625, "rewards/chosen": -0.1268942505121231, "rewards/margins": 0.027234943583607674, "rewards/rejected": -0.15412919223308563, "step": 918 }, { "epoch": 2.4232036914963744, "grad_norm": 10.062116622924805, "learning_rate": 1.5474711623779946e-06, "log_odds_chosen": 0.40707215666770935, "log_odds_ratio": -0.5170718431472778, "logits/chosen": -1.1261249780654907, "logits/rejected": -1.0706229209899902, "logps/chosen": -1.3197919130325317, "logps/rejected": -1.6365785598754883, "loss": 2.1433, "nll_loss": 0.4841257929801941, "rewards/accuracies": 1.0, "rewards/chosen": -0.13197919726371765, "rewards/margins": 0.03167865425348282, "rewards/rejected": -0.16365784406661987, "step": 919 }, { "epoch": 2.4258404746209625, "grad_norm": 9.968379020690918, "learning_rate": 1.5403726708074533e-06, "log_odds_chosen": 0.9766196012496948, "log_odds_ratio": -0.38572365045547485, "logits/chosen": -1.0680983066558838, "logits/rejected": -0.9757612347602844, "logps/chosen": -1.1962252855300903, "logps/rejected": -1.8891607522964478, "loss": 2.0798, "nll_loss": 0.48137712478637695, "rewards/accuracies": 0.875, "rewards/chosen": -0.11962252855300903, "rewards/margins": 0.0692935436964035, "rewards/rejected": -0.18891608715057373, "step": 920 }, { "epoch": 2.4284772577455502, "grad_norm": 10.270686149597168, "learning_rate": 1.5332741792369122e-06, "log_odds_chosen": 0.7579600214958191, "log_odds_ratio": -0.40435367822647095, "logits/chosen": -1.077327013015747, "logits/rejected": -0.954658567905426, "logps/chosen": -1.3990119695663452, "logps/rejected": -2.0181727409362793, "loss": 2.1346, "nll_loss": 0.4932202696800232, "rewards/accuracies": 0.875, "rewards/chosen": -0.13990119099617004, "rewards/margins": 0.06191607564687729, "rewards/rejected": -0.20181725919246674, "step": 921 }, { "epoch": 2.4311140408701384, "grad_norm": 10.072905540466309, "learning_rate": 1.5261756876663707e-06, "log_odds_chosen": 0.539434015750885, "log_odds_ratio": -0.48120465874671936, "logits/chosen": -1.085160255432129, "logits/rejected": -0.9633433222770691, "logps/chosen": -1.4367750883102417, "logps/rejected": -1.837584137916565, "loss": 2.2237, "nll_loss": 0.5078064799308777, "rewards/accuracies": 0.875, "rewards/chosen": -0.1436775028705597, "rewards/margins": 0.040080904960632324, "rewards/rejected": -0.1837584227323532, "step": 922 }, { "epoch": 2.4337508239947265, "grad_norm": 10.620375633239746, "learning_rate": 1.5190771960958296e-06, "log_odds_chosen": 0.19084365665912628, "log_odds_ratio": -0.6263587474822998, "logits/chosen": -1.134689211845398, "logits/rejected": -1.024929165840149, "logps/chosen": -1.4626632928848267, "logps/rejected": -1.6243414878845215, "loss": 2.854, "nll_loss": 0.6508681774139404, "rewards/accuracies": 0.75, "rewards/chosen": -0.14626634120941162, "rewards/margins": 0.016167812049388885, "rewards/rejected": -0.1624341458082199, "step": 923 }, { "epoch": 2.4363876071193147, "grad_norm": 9.383076667785645, "learning_rate": 1.5119787045252883e-06, "log_odds_chosen": 0.7854894399642944, "log_odds_ratio": -0.42655807733535767, "logits/chosen": -1.0859063863754272, "logits/rejected": -1.0121045112609863, "logps/chosen": -1.3842235803604126, "logps/rejected": -2.0174098014831543, "loss": 2.3423, "nll_loss": 0.5429260730743408, "rewards/accuracies": 1.0, "rewards/chosen": -0.13842236995697021, "rewards/margins": 0.06331861764192581, "rewards/rejected": -0.20174098014831543, "step": 924 }, { "epoch": 2.4390243902439024, "grad_norm": 9.584192276000977, "learning_rate": 1.5048802129547472e-06, "log_odds_chosen": 0.8783799409866333, "log_odds_ratio": -0.37397417426109314, "logits/chosen": -1.03621506690979, "logits/rejected": -0.9416929483413696, "logps/chosen": -1.3316413164138794, "logps/rejected": -2.048794746398926, "loss": 1.7439, "nll_loss": 0.3985658884048462, "rewards/accuracies": 1.0, "rewards/chosen": -0.13316413760185242, "rewards/margins": 0.0717153549194336, "rewards/rejected": -0.204879492521286, "step": 925 }, { "epoch": 2.4416611733684905, "grad_norm": 10.248364448547363, "learning_rate": 1.4977817213842058e-06, "log_odds_chosen": 0.604690670967102, "log_odds_ratio": -0.451438844203949, "logits/chosen": -1.1782548427581787, "logits/rejected": -1.0685997009277344, "logps/chosen": -1.2413527965545654, "logps/rejected": -1.7049214839935303, "loss": 2.5042, "nll_loss": 0.5809139013290405, "rewards/accuracies": 1.0, "rewards/chosen": -0.12413527816534042, "rewards/margins": 0.04635685682296753, "rewards/rejected": -0.17049214243888855, "step": 926 }, { "epoch": 2.4442979564930782, "grad_norm": 9.285633087158203, "learning_rate": 1.4906832298136645e-06, "log_odds_chosen": 0.8101198673248291, "log_odds_ratio": -0.37296995520591736, "logits/chosen": -1.0476787090301514, "logits/rejected": -0.9622210264205933, "logps/chosen": -1.233733892440796, "logps/rejected": -1.853700876235962, "loss": 1.8566, "nll_loss": 0.4268467426300049, "rewards/accuracies": 1.0, "rewards/chosen": -0.12337338924407959, "rewards/margins": 0.061996713280677795, "rewards/rejected": -0.1853700876235962, "step": 927 }, { "epoch": 2.4469347396176664, "grad_norm": 8.915876388549805, "learning_rate": 1.4835847382431232e-06, "log_odds_chosen": 0.36250048875808716, "log_odds_ratio": -0.5382492542266846, "logits/chosen": -1.1555805206298828, "logits/rejected": -1.116523027420044, "logps/chosen": -1.1413676738739014, "logps/rejected": -1.4002259969711304, "loss": 2.0615, "nll_loss": 0.4615498185157776, "rewards/accuracies": 0.875, "rewards/chosen": -0.11413675546646118, "rewards/margins": 0.025885840877890587, "rewards/rejected": -0.14002260565757751, "step": 928 }, { "epoch": 2.4495715227422545, "grad_norm": 10.278735160827637, "learning_rate": 1.4764862466725821e-06, "log_odds_chosen": 0.38574719429016113, "log_odds_ratio": -0.5276346802711487, "logits/chosen": -1.1740241050720215, "logits/rejected": -1.1242327690124512, "logps/chosen": -1.2782037258148193, "logps/rejected": -1.5531538724899292, "loss": 2.7706, "nll_loss": 0.639897882938385, "rewards/accuracies": 1.0, "rewards/chosen": -0.12782037258148193, "rewards/margins": 0.027495015412569046, "rewards/rejected": -0.15531539916992188, "step": 929 }, { "epoch": 2.4522083058668427, "grad_norm": 9.918184280395508, "learning_rate": 1.4693877551020408e-06, "log_odds_chosen": 0.4487338066101074, "log_odds_ratio": -0.5068457126617432, "logits/chosen": -1.1033174991607666, "logits/rejected": -1.0059428215026855, "logps/chosen": -1.403915524482727, "logps/rejected": -1.7551565170288086, "loss": 2.3905, "nll_loss": 0.5469300150871277, "rewards/accuracies": 0.875, "rewards/chosen": -0.14039155840873718, "rewards/margins": 0.035124097019433975, "rewards/rejected": -0.17551565170288086, "step": 930 }, { "epoch": 2.4548450889914304, "grad_norm": 8.942968368530273, "learning_rate": 1.4622892635314995e-06, "log_odds_chosen": 0.7661544680595398, "log_odds_ratio": -0.4091379940509796, "logits/chosen": -1.0771650075912476, "logits/rejected": -0.9967333078384399, "logps/chosen": -1.1602619886398315, "logps/rejected": -1.7546803951263428, "loss": 1.883, "nll_loss": 0.42984068393707275, "rewards/accuracies": 1.0, "rewards/chosen": -0.11602620780467987, "rewards/margins": 0.05944184586405754, "rewards/rejected": -0.1754680573940277, "step": 931 }, { "epoch": 2.4574818721160185, "grad_norm": 10.314979553222656, "learning_rate": 1.4551907719609582e-06, "log_odds_chosen": 0.9014593362808228, "log_odds_ratio": -0.3533499538898468, "logits/chosen": -1.0980167388916016, "logits/rejected": -0.9716736078262329, "logps/chosen": -1.2577919960021973, "logps/rejected": -1.9532355070114136, "loss": 2.1167, "nll_loss": 0.4938514828681946, "rewards/accuracies": 1.0, "rewards/chosen": -0.12577921152114868, "rewards/margins": 0.06954433768987656, "rewards/rejected": -0.19532354176044464, "step": 932 }, { "epoch": 2.4601186552406062, "grad_norm": 10.245210647583008, "learning_rate": 1.448092280390417e-06, "log_odds_chosen": 0.7105802893638611, "log_odds_ratio": -0.4090927541255951, "logits/chosen": -1.1053932905197144, "logits/rejected": -0.9996531009674072, "logps/chosen": -1.353420376777649, "logps/rejected": -1.9266541004180908, "loss": 2.277, "nll_loss": 0.5283440351486206, "rewards/accuracies": 1.0, "rewards/chosen": -0.1353420466184616, "rewards/margins": 0.05732336640357971, "rewards/rejected": -0.19266541302204132, "step": 933 }, { "epoch": 2.4627554383651944, "grad_norm": 10.484724998474121, "learning_rate": 1.4409937888198758e-06, "log_odds_chosen": 0.6949914693832397, "log_odds_ratio": -0.41040536761283875, "logits/chosen": -1.1644728183746338, "logits/rejected": -1.0214042663574219, "logps/chosen": -1.4014010429382324, "logps/rejected": -1.9624037742614746, "loss": 2.5328, "nll_loss": 0.5921629667282104, "rewards/accuracies": 1.0, "rewards/chosen": -0.1401400864124298, "rewards/margins": 0.0561002753674984, "rewards/rejected": -0.1962403804063797, "step": 934 }, { "epoch": 2.4653922214897825, "grad_norm": 9.716775894165039, "learning_rate": 1.4338952972493345e-06, "log_odds_chosen": 0.860659658908844, "log_odds_ratio": -0.37499281764030457, "logits/chosen": -1.0980603694915771, "logits/rejected": -1.0053725242614746, "logps/chosen": -1.1715384721755981, "logps/rejected": -1.8061293363571167, "loss": 1.8292, "nll_loss": 0.41981303691864014, "rewards/accuracies": 1.0, "rewards/chosen": -0.11715385317802429, "rewards/margins": 0.06345908343791962, "rewards/rejected": -0.18061292171478271, "step": 935 }, { "epoch": 2.4680290046143707, "grad_norm": 9.445055961608887, "learning_rate": 1.4267968056787931e-06, "log_odds_chosen": 0.34475648403167725, "log_odds_ratio": -0.5601778626441956, "logits/chosen": -1.0520339012145996, "logits/rejected": -0.9856969118118286, "logps/chosen": -1.2843670845031738, "logps/rejected": -1.5656042098999023, "loss": 2.1506, "nll_loss": 0.4816225469112396, "rewards/accuracies": 0.875, "rewards/chosen": -0.12843671441078186, "rewards/margins": 0.028123700991272926, "rewards/rejected": -0.15656042098999023, "step": 936 }, { "epoch": 2.4706657877389584, "grad_norm": 10.8469877243042, "learning_rate": 1.4196983141082518e-06, "log_odds_chosen": 0.5317217707633972, "log_odds_ratio": -0.4845069348812103, "logits/chosen": -1.1012637615203857, "logits/rejected": -1.030122995376587, "logps/chosen": -1.5236291885375977, "logps/rejected": -1.9571136236190796, "loss": 2.2323, "nll_loss": 0.5096133351325989, "rewards/accuracies": 0.875, "rewards/chosen": -0.1523628979921341, "rewards/margins": 0.04334845766425133, "rewards/rejected": -0.19571135938167572, "step": 937 }, { "epoch": 2.4733025708635465, "grad_norm": 9.446576118469238, "learning_rate": 1.4125998225377107e-06, "log_odds_chosen": 0.6366671323776245, "log_odds_ratio": -0.4362477660179138, "logits/chosen": -1.0753679275512695, "logits/rejected": -0.9698559045791626, "logps/chosen": -1.1360713243484497, "logps/rejected": -1.6101000308990479, "loss": 1.7274, "nll_loss": 0.38822537660598755, "rewards/accuracies": 0.875, "rewards/chosen": -0.11360714584589005, "rewards/margins": 0.04740285128355026, "rewards/rejected": -0.1610099971294403, "step": 938 }, { "epoch": 2.4759393539881343, "grad_norm": 10.09611988067627, "learning_rate": 1.4055013309671694e-06, "log_odds_chosen": 0.6295111179351807, "log_odds_ratio": -0.4323040843009949, "logits/chosen": -1.1586459875106812, "logits/rejected": -1.04908287525177, "logps/chosen": -1.2559092044830322, "logps/rejected": -1.7403661012649536, "loss": 2.2095, "nll_loss": 0.5091511607170105, "rewards/accuracies": 1.0, "rewards/chosen": -0.12559092044830322, "rewards/margins": 0.0484456792473793, "rewards/rejected": -0.17403662204742432, "step": 939 }, { "epoch": 2.4785761371127224, "grad_norm": 10.044174194335938, "learning_rate": 1.398402839396628e-06, "log_odds_chosen": 0.44056281447410583, "log_odds_ratio": -0.5164551734924316, "logits/chosen": -1.1366794109344482, "logits/rejected": -0.9838275909423828, "logps/chosen": -1.3997766971588135, "logps/rejected": -1.7464218139648438, "loss": 2.4201, "nll_loss": 0.5533714294433594, "rewards/accuracies": 0.75, "rewards/chosen": -0.13997766375541687, "rewards/margins": 0.03466450050473213, "rewards/rejected": -0.1746421754360199, "step": 940 }, { "epoch": 2.4812129202373105, "grad_norm": 10.043971061706543, "learning_rate": 1.3913043478260868e-06, "log_odds_chosen": 0.36631205677986145, "log_odds_ratio": -0.5362348556518555, "logits/chosen": -1.0773563385009766, "logits/rejected": -1.0182524919509888, "logps/chosen": -1.2820074558258057, "logps/rejected": -1.5590157508850098, "loss": 2.5162, "nll_loss": 0.5754203200340271, "rewards/accuracies": 1.0, "rewards/chosen": -0.12820076942443848, "rewards/margins": 0.027700817212462425, "rewards/rejected": -0.15590158104896545, "step": 941 }, { "epoch": 2.4838497033618987, "grad_norm": 10.077068328857422, "learning_rate": 1.3842058562555457e-06, "log_odds_chosen": 0.6632999777793884, "log_odds_ratio": -0.4329252243041992, "logits/chosen": -1.0947636365890503, "logits/rejected": -1.0356365442276, "logps/chosen": -1.3468356132507324, "logps/rejected": -1.8621776103973389, "loss": 2.2513, "nll_loss": 0.5195324420928955, "rewards/accuracies": 0.875, "rewards/chosen": -0.13468356430530548, "rewards/margins": 0.05153419449925423, "rewards/rejected": -0.1862177550792694, "step": 942 }, { "epoch": 2.4864864864864864, "grad_norm": 9.562148094177246, "learning_rate": 1.3771073646850044e-06, "log_odds_chosen": 1.0629949569702148, "log_odds_ratio": -0.38897430896759033, "logits/chosen": -1.144912600517273, "logits/rejected": -1.0449944734573364, "logps/chosen": -1.256197452545166, "logps/rejected": -2.1591362953186035, "loss": 2.062, "nll_loss": 0.4766029119491577, "rewards/accuracies": 1.0, "rewards/chosen": -0.12561975419521332, "rewards/margins": 0.09029386937618256, "rewards/rejected": -0.21591362357139587, "step": 943 }, { "epoch": 2.4891232696110746, "grad_norm": 9.22219467163086, "learning_rate": 1.370008873114463e-06, "log_odds_chosen": 0.456211119890213, "log_odds_ratio": -0.5188755989074707, "logits/chosen": -1.0710067749023438, "logits/rejected": -1.0109732151031494, "logps/chosen": -1.1019004583358765, "logps/rejected": -1.4601200819015503, "loss": 2.0044, "nll_loss": 0.44920358061790466, "rewards/accuracies": 0.875, "rewards/chosen": -0.11019004881381989, "rewards/margins": 0.03582196310162544, "rewards/rejected": -0.14601200819015503, "step": 944 }, { "epoch": 2.4917600527356623, "grad_norm": 10.367738723754883, "learning_rate": 1.3629103815439218e-06, "log_odds_chosen": 0.5476827025413513, "log_odds_ratio": -0.47168028354644775, "logits/chosen": -1.2086514234542847, "logits/rejected": -1.066209316253662, "logps/chosen": -1.3265548944473267, "logps/rejected": -1.7548052072525024, "loss": 2.438, "nll_loss": 0.5623201131820679, "rewards/accuracies": 0.875, "rewards/chosen": -0.13265548646450043, "rewards/margins": 0.042825035750865936, "rewards/rejected": -0.17548052966594696, "step": 945 }, { "epoch": 2.4943968358602504, "grad_norm": 10.300585746765137, "learning_rate": 1.3558118899733807e-06, "log_odds_chosen": 0.49440160393714905, "log_odds_ratio": -0.4904671013355255, "logits/chosen": -1.1634931564331055, "logits/rejected": -1.1151797771453857, "logps/chosen": -1.3160359859466553, "logps/rejected": -1.7080636024475098, "loss": 2.5215, "nll_loss": 0.5813170671463013, "rewards/accuracies": 1.0, "rewards/chosen": -0.13160359859466553, "rewards/margins": 0.03920276463031769, "rewards/rejected": -0.1708063781261444, "step": 946 }, { "epoch": 2.4970336189848386, "grad_norm": 9.261826515197754, "learning_rate": 1.3487133984028393e-06, "log_odds_chosen": 0.48070111870765686, "log_odds_ratio": -0.506763219833374, "logits/chosen": -1.059067964553833, "logits/rejected": -0.9914778470993042, "logps/chosen": -1.228057861328125, "logps/rejected": -1.629408836364746, "loss": 1.8701, "nll_loss": 0.4168543219566345, "rewards/accuracies": 0.875, "rewards/chosen": -0.12280578911304474, "rewards/margins": 0.040135085582733154, "rewards/rejected": -0.1629408746957779, "step": 947 }, { "epoch": 2.4996704021094267, "grad_norm": 9.628554344177246, "learning_rate": 1.3416149068322982e-06, "log_odds_chosen": 0.6004096269607544, "log_odds_ratio": -0.4586912989616394, "logits/chosen": -1.0332534313201904, "logits/rejected": -0.9923993945121765, "logps/chosen": -1.2210625410079956, "logps/rejected": -1.6863110065460205, "loss": 1.7498, "nll_loss": 0.3915759325027466, "rewards/accuracies": 0.875, "rewards/chosen": -0.12210625410079956, "rewards/margins": 0.04652484133839607, "rewards/rejected": -0.16863110661506653, "step": 948 }, { "epoch": 2.5023071852340144, "grad_norm": 9.986357688903809, "learning_rate": 1.3345164152617567e-06, "log_odds_chosen": 0.8193560838699341, "log_odds_ratio": -0.4735927879810333, "logits/chosen": -1.0893129110336304, "logits/rejected": -1.0309288501739502, "logps/chosen": -1.295593500137329, "logps/rejected": -1.9911038875579834, "loss": 1.9891, "nll_loss": 0.4499149024486542, "rewards/accuracies": 0.875, "rewards/chosen": -0.12955935299396515, "rewards/margins": 0.06955103576183319, "rewards/rejected": -0.19911038875579834, "step": 949 }, { "epoch": 2.5049439683586026, "grad_norm": 9.801891326904297, "learning_rate": 1.3274179236912156e-06, "log_odds_chosen": 0.6543738842010498, "log_odds_ratio": -0.438315212726593, "logits/chosen": -1.1201646327972412, "logits/rejected": -0.994601845741272, "logps/chosen": -1.2346127033233643, "logps/rejected": -1.7307636737823486, "loss": 2.2012, "nll_loss": 0.5064605474472046, "rewards/accuracies": 1.0, "rewards/chosen": -0.1234612762928009, "rewards/margins": 0.049615100026130676, "rewards/rejected": -0.17307636141777039, "step": 950 }, { "epoch": 2.5075807514831903, "grad_norm": 9.423513412475586, "learning_rate": 1.3203194321206743e-06, "log_odds_chosen": 0.7122058868408203, "log_odds_ratio": -0.41069626808166504, "logits/chosen": -1.0354974269866943, "logits/rejected": -0.953643262386322, "logps/chosen": -1.1242071390151978, "logps/rejected": -1.6442720890045166, "loss": 1.7784, "nll_loss": 0.40354055166244507, "rewards/accuracies": 1.0, "rewards/chosen": -0.1124207079410553, "rewards/margins": 0.052006494253873825, "rewards/rejected": -0.16442719101905823, "step": 951 }, { "epoch": 2.5102175346077784, "grad_norm": 9.538496017456055, "learning_rate": 1.3132209405501332e-06, "log_odds_chosen": 0.7136922478675842, "log_odds_ratio": -0.41526490449905396, "logits/chosen": -1.1126666069030762, "logits/rejected": -0.994556725025177, "logps/chosen": -1.1598321199417114, "logps/rejected": -1.6915475130081177, "loss": 1.8733, "nll_loss": 0.4268023371696472, "rewards/accuracies": 0.875, "rewards/chosen": -0.11598322540521622, "rewards/margins": 0.053171537816524506, "rewards/rejected": -0.16915476322174072, "step": 952 }, { "epoch": 2.5128543177323666, "grad_norm": 9.990797996520996, "learning_rate": 1.3061224489795917e-06, "log_odds_chosen": 0.5567875504493713, "log_odds_ratio": -0.47149914503097534, "logits/chosen": -1.1117167472839355, "logits/rejected": -1.0665760040283203, "logps/chosen": -1.40949285030365, "logps/rejected": -1.861353874206543, "loss": 2.4129, "nll_loss": 0.5560762882232666, "rewards/accuracies": 0.875, "rewards/chosen": -0.1409492939710617, "rewards/margins": 0.045186106115579605, "rewards/rejected": -0.186135396361351, "step": 953 }, { "epoch": 2.5154911008569547, "grad_norm": 10.886003494262695, "learning_rate": 1.2990239574090504e-06, "log_odds_chosen": 0.585273265838623, "log_odds_ratio": -0.4497223198413849, "logits/chosen": -1.2316553592681885, "logits/rejected": -1.067345380783081, "logps/chosen": -1.340728759765625, "logps/rejected": -1.8043076992034912, "loss": 2.6535, "nll_loss": 0.6184085607528687, "rewards/accuracies": 1.0, "rewards/chosen": -0.1340728998184204, "rewards/margins": 0.0463578887283802, "rewards/rejected": -0.18043076992034912, "step": 954 }, { "epoch": 2.5181278839815424, "grad_norm": 11.22103214263916, "learning_rate": 1.2919254658385093e-06, "log_odds_chosen": 0.46139469742774963, "log_odds_ratio": -0.5149021744728088, "logits/chosen": -1.1275732517242432, "logits/rejected": -1.0286986827850342, "logps/chosen": -1.507557988166809, "logps/rejected": -1.8830695152282715, "loss": 2.4608, "nll_loss": 0.5636999607086182, "rewards/accuracies": 0.75, "rewards/chosen": -0.15075579285621643, "rewards/margins": 0.0375511571764946, "rewards/rejected": -0.18830695748329163, "step": 955 }, { "epoch": 2.5207646671061306, "grad_norm": 10.8544921875, "learning_rate": 1.284826974267968e-06, "log_odds_chosen": 0.5111101865768433, "log_odds_ratio": -0.4778319001197815, "logits/chosen": -1.109304428100586, "logits/rejected": -0.9567216038703918, "logps/chosen": -1.4662706851959229, "logps/rejected": -1.87843656539917, "loss": 2.3825, "nll_loss": 0.5478419661521912, "rewards/accuracies": 1.0, "rewards/chosen": -0.14662706851959229, "rewards/margins": 0.04121658578515053, "rewards/rejected": -0.18784365057945251, "step": 956 }, { "epoch": 2.5234014502307183, "grad_norm": 9.740726470947266, "learning_rate": 1.2777284826974269e-06, "log_odds_chosen": 0.6970908045768738, "log_odds_ratio": -0.41333848237991333, "logits/chosen": -1.038455843925476, "logits/rejected": -0.9585951566696167, "logps/chosen": -1.2818219661712646, "logps/rejected": -1.8254046440124512, "loss": 1.9038, "nll_loss": 0.43462392687797546, "rewards/accuracies": 1.0, "rewards/chosen": -0.12818220257759094, "rewards/margins": 0.05435826629400253, "rewards/rejected": -0.18254047632217407, "step": 957 }, { "epoch": 2.5260382333553064, "grad_norm": 10.290287971496582, "learning_rate": 1.2706299911268853e-06, "log_odds_chosen": 0.6766616106033325, "log_odds_ratio": -0.42089855670928955, "logits/chosen": -1.1274069547653198, "logits/rejected": -0.9903121590614319, "logps/chosen": -1.4189093112945557, "logps/rejected": -1.977081060409546, "loss": 2.3508, "nll_loss": 0.5456117987632751, "rewards/accuracies": 1.0, "rewards/chosen": -0.14189092814922333, "rewards/margins": 0.055817171931266785, "rewards/rejected": -0.1977081000804901, "step": 958 }, { "epoch": 2.5286750164798946, "grad_norm": 10.350006103515625, "learning_rate": 1.2635314995563442e-06, "log_odds_chosen": 0.7320818305015564, "log_odds_ratio": -0.49115613102912903, "logits/chosen": -1.0401033163070679, "logits/rejected": -1.0141887664794922, "logps/chosen": -1.305769443511963, "logps/rejected": -1.9269342422485352, "loss": 2.0069, "nll_loss": 0.4525972008705139, "rewards/accuracies": 0.875, "rewards/chosen": -0.1305769383907318, "rewards/margins": 0.06211646646261215, "rewards/rejected": -0.19269341230392456, "step": 959 }, { "epoch": 2.5313117996044827, "grad_norm": 10.294855117797852, "learning_rate": 1.256433007985803e-06, "log_odds_chosen": 0.5948234796524048, "log_odds_ratio": -0.46262726187705994, "logits/chosen": -1.1262372732162476, "logits/rejected": -1.0458968877792358, "logps/chosen": -1.3608264923095703, "logps/rejected": -1.8277140855789185, "loss": 2.3149, "nll_loss": 0.5324715971946716, "rewards/accuracies": 0.875, "rewards/chosen": -0.13608264923095703, "rewards/margins": 0.046688761562108994, "rewards/rejected": -0.18277139961719513, "step": 960 }, { "epoch": 2.5339485827290704, "grad_norm": 9.843774795532227, "learning_rate": 1.2493345164152618e-06, "log_odds_chosen": 0.39834561944007874, "log_odds_ratio": -0.5356795191764832, "logits/chosen": -1.1416336297988892, "logits/rejected": -1.0767290592193604, "logps/chosen": -1.345848560333252, "logps/rejected": -1.6646955013275146, "loss": 2.4462, "nll_loss": 0.5579822659492493, "rewards/accuracies": 0.875, "rewards/chosen": -0.13458484411239624, "rewards/margins": 0.031884703785181046, "rewards/rejected": -0.166469544172287, "step": 961 }, { "epoch": 2.5365853658536586, "grad_norm": 10.40951919555664, "learning_rate": 1.2422360248447203e-06, "log_odds_chosen": 0.6294993162155151, "log_odds_ratio": -0.4509700536727905, "logits/chosen": -1.213744044303894, "logits/rejected": -1.1220805644989014, "logps/chosen": -1.324014663696289, "logps/rejected": -1.8238645792007446, "loss": 2.3754, "nll_loss": 0.5487492084503174, "rewards/accuracies": 1.0, "rewards/chosen": -0.1324014663696289, "rewards/margins": 0.04998498782515526, "rewards/rejected": -0.18238645792007446, "step": 962 }, { "epoch": 2.5392221489782463, "grad_norm": 9.549568176269531, "learning_rate": 1.2351375332741792e-06, "log_odds_chosen": 0.6109352111816406, "log_odds_ratio": -0.4740837514400482, "logits/chosen": -1.0815215110778809, "logits/rejected": -0.9629336595535278, "logps/chosen": -1.2634634971618652, "logps/rejected": -1.7190935611724854, "loss": 1.9363, "nll_loss": 0.4366764724254608, "rewards/accuracies": 0.75, "rewards/chosen": -0.12634634971618652, "rewards/margins": 0.04556302726268768, "rewards/rejected": -0.171909362077713, "step": 963 }, { "epoch": 2.5418589321028344, "grad_norm": 9.77994441986084, "learning_rate": 1.2280390417036379e-06, "log_odds_chosen": 0.46185052394866943, "log_odds_ratio": -0.5055705904960632, "logits/chosen": -1.1587986946105957, "logits/rejected": -1.0876436233520508, "logps/chosen": -1.1706972122192383, "logps/rejected": -1.5135177373886108, "loss": 2.0106, "nll_loss": 0.4520954489707947, "rewards/accuracies": 0.875, "rewards/chosen": -0.11706972867250443, "rewards/margins": 0.03428204730153084, "rewards/rejected": -0.15135176479816437, "step": 964 }, { "epoch": 2.5444957152274226, "grad_norm": 10.625575065612793, "learning_rate": 1.2209405501330968e-06, "log_odds_chosen": 0.8708397150039673, "log_odds_ratio": -0.37743672728538513, "logits/chosen": -1.0998449325561523, "logits/rejected": -1.0114262104034424, "logps/chosen": -1.4298604726791382, "logps/rejected": -2.1550512313842773, "loss": 2.1465, "nll_loss": 0.49889129400253296, "rewards/accuracies": 1.0, "rewards/chosen": -0.14298602938652039, "rewards/margins": 0.07251909375190735, "rewards/rejected": -0.21550512313842773, "step": 965 }, { "epoch": 2.5471324983520107, "grad_norm": 9.72364330291748, "learning_rate": 1.2138420585625555e-06, "log_odds_chosen": 0.5993391871452332, "log_odds_ratio": -0.4506445825099945, "logits/chosen": -1.15058171749115, "logits/rejected": -1.0429490804672241, "logps/chosen": -1.2814643383026123, "logps/rejected": -1.7237976789474487, "loss": 2.4773, "nll_loss": 0.5742565393447876, "rewards/accuracies": 1.0, "rewards/chosen": -0.1281464397907257, "rewards/margins": 0.04423331469297409, "rewards/rejected": -0.1723797619342804, "step": 966 }, { "epoch": 2.5497692814765984, "grad_norm": 9.3811616897583, "learning_rate": 1.2067435669920142e-06, "log_odds_chosen": 0.8285520672798157, "log_odds_ratio": -0.46860355138778687, "logits/chosen": -1.133819818496704, "logits/rejected": -1.083085536956787, "logps/chosen": -1.2597310543060303, "logps/rejected": -1.9405475854873657, "loss": 2.148, "nll_loss": 0.49013227224349976, "rewards/accuracies": 1.0, "rewards/chosen": -0.12597310543060303, "rewards/margins": 0.06808167695999146, "rewards/rejected": -0.19405478239059448, "step": 967 }, { "epoch": 2.5524060646011866, "grad_norm": 9.492497444152832, "learning_rate": 1.1996450754214728e-06, "log_odds_chosen": 0.5317522287368774, "log_odds_ratio": -0.4742090702056885, "logits/chosen": -1.0644326210021973, "logits/rejected": -1.0012681484222412, "logps/chosen": -1.2771978378295898, "logps/rejected": -1.6803510189056396, "loss": 1.9058, "nll_loss": 0.42904096841812134, "rewards/accuracies": 1.0, "rewards/chosen": -0.12771978974342346, "rewards/margins": 0.04031532257795334, "rewards/rejected": -0.1680351048707962, "step": 968 }, { "epoch": 2.5550428477257743, "grad_norm": 10.43970775604248, "learning_rate": 1.1925465838509317e-06, "log_odds_chosen": 0.3138500154018402, "log_odds_ratio": -0.5608047246932983, "logits/chosen": -1.1299519538879395, "logits/rejected": -1.0770107507705688, "logps/chosen": -1.4027135372161865, "logps/rejected": -1.6526198387145996, "loss": 2.7312, "nll_loss": 0.6267147660255432, "rewards/accuracies": 0.875, "rewards/chosen": -0.1402713656425476, "rewards/margins": 0.024990636855363846, "rewards/rejected": -0.16526201367378235, "step": 969 }, { "epoch": 2.5576796308503624, "grad_norm": 10.022076606750488, "learning_rate": 1.1854480922803904e-06, "log_odds_chosen": 0.7182021737098694, "log_odds_ratio": -0.4174467921257019, "logits/chosen": -1.108182668685913, "logits/rejected": -1.0129443407058716, "logps/chosen": -1.3148906230926514, "logps/rejected": -1.883524775505066, "loss": 2.5325, "nll_loss": 0.591376781463623, "rewards/accuracies": 1.0, "rewards/chosen": -0.13148906826972961, "rewards/margins": 0.05686340853571892, "rewards/rejected": -0.18835246562957764, "step": 970 }, { "epoch": 2.5603164139749506, "grad_norm": 9.961695671081543, "learning_rate": 1.178349600709849e-06, "log_odds_chosen": 0.7222421765327454, "log_odds_ratio": -0.46626126766204834, "logits/chosen": -1.1342153549194336, "logits/rejected": -1.0204015970230103, "logps/chosen": -1.2801722288131714, "logps/rejected": -1.8723406791687012, "loss": 2.3123, "nll_loss": 0.5314509272575378, "rewards/accuracies": 0.875, "rewards/chosen": -0.12801721692085266, "rewards/margins": 0.05921683460474014, "rewards/rejected": -0.1872340738773346, "step": 971 }, { "epoch": 2.5629531970995387, "grad_norm": 10.804487228393555, "learning_rate": 1.1712511091393078e-06, "log_odds_chosen": 0.3321448564529419, "log_odds_ratio": -0.5600012540817261, "logits/chosen": -1.1126806735992432, "logits/rejected": -1.0470176935195923, "logps/chosen": -1.4884405136108398, "logps/rejected": -1.739980697631836, "loss": 2.9032, "nll_loss": 0.6698001623153687, "rewards/accuracies": 0.875, "rewards/chosen": -0.14884406328201294, "rewards/margins": 0.02515401318669319, "rewards/rejected": -0.17399807274341583, "step": 972 }, { "epoch": 2.5655899802241264, "grad_norm": 10.045714378356934, "learning_rate": 1.1641526175687665e-06, "log_odds_chosen": 0.5490153431892395, "log_odds_ratio": -0.46695026755332947, "logits/chosen": -1.1155519485473633, "logits/rejected": -1.0676300525665283, "logps/chosen": -1.1715837717056274, "logps/rejected": -1.5821819305419922, "loss": 2.3139, "nll_loss": 0.5317729115486145, "rewards/accuracies": 0.875, "rewards/chosen": -0.11715838313102722, "rewards/margins": 0.04105980694293976, "rewards/rejected": -0.15821819007396698, "step": 973 }, { "epoch": 2.5682267633487146, "grad_norm": 10.197787284851074, "learning_rate": 1.1570541259982254e-06, "log_odds_chosen": 0.8162627220153809, "log_odds_ratio": -0.40598583221435547, "logits/chosen": -1.1592998504638672, "logits/rejected": -1.0291635990142822, "logps/chosen": -1.3154046535491943, "logps/rejected": -1.9530718326568604, "loss": 2.2154, "nll_loss": 0.5132494568824768, "rewards/accuracies": 0.875, "rewards/chosen": -0.1315404623746872, "rewards/margins": 0.0637667179107666, "rewards/rejected": -0.1953071802854538, "step": 974 }, { "epoch": 2.5708635464733027, "grad_norm": 10.279739379882812, "learning_rate": 1.149955634427684e-06, "log_odds_chosen": 0.480248361825943, "log_odds_ratio": -0.49725350737571716, "logits/chosen": -1.1203275918960571, "logits/rejected": -1.0338075160980225, "logps/chosen": -1.4031107425689697, "logps/rejected": -1.7773079872131348, "loss": 2.8042, "nll_loss": 0.6513198614120483, "rewards/accuracies": 0.875, "rewards/chosen": -0.1403110772371292, "rewards/margins": 0.03741970658302307, "rewards/rejected": -0.17773079872131348, "step": 975 }, { "epoch": 2.5735003295978904, "grad_norm": 9.14466667175293, "learning_rate": 1.1428571428571428e-06, "log_odds_chosen": 0.6685113310813904, "log_odds_ratio": -0.4547085165977478, "logits/chosen": -1.0525332689285278, "logits/rejected": -0.9893536567687988, "logps/chosen": -1.080911636352539, "logps/rejected": -1.4747405052185059, "loss": 1.7507, "nll_loss": 0.3922029733657837, "rewards/accuracies": 0.75, "rewards/chosen": -0.10809117555618286, "rewards/margins": 0.03938288241624832, "rewards/rejected": -0.14747406542301178, "step": 976 }, { "epoch": 2.5761371127224786, "grad_norm": 10.132596969604492, "learning_rate": 1.1357586512866015e-06, "log_odds_chosen": 0.6782791018486023, "log_odds_ratio": -0.4253457188606262, "logits/chosen": -1.107090950012207, "logits/rejected": -1.0170270204544067, "logps/chosen": -1.1491903066635132, "logps/rejected": -1.6603305339813232, "loss": 1.968, "nll_loss": 0.4494568109512329, "rewards/accuracies": 1.0, "rewards/chosen": -0.1149190291762352, "rewards/margins": 0.051114022731781006, "rewards/rejected": -0.1660330444574356, "step": 977 }, { "epoch": 2.5787738958470667, "grad_norm": 10.886836051940918, "learning_rate": 1.1286601597160604e-06, "log_odds_chosen": 0.6357942223548889, "log_odds_ratio": -0.45238715410232544, "logits/chosen": -1.1401660442352295, "logits/rejected": -1.0261318683624268, "logps/chosen": -1.4753997325897217, "logps/rejected": -2.0056228637695312, "loss": 2.4576, "nll_loss": 0.5691531896591187, "rewards/accuracies": 1.0, "rewards/chosen": -0.14753997325897217, "rewards/margins": 0.05302230268716812, "rewards/rejected": -0.20056229829788208, "step": 978 }, { "epoch": 2.5814106789716544, "grad_norm": 9.240728378295898, "learning_rate": 1.121561668145519e-06, "log_odds_chosen": 0.37126636505126953, "log_odds_ratio": -0.5295717716217041, "logits/chosen": -1.1164339780807495, "logits/rejected": -1.0501753091812134, "logps/chosen": -1.0403053760528564, "logps/rejected": -1.2815606594085693, "loss": 1.9984, "nll_loss": 0.44663843512535095, "rewards/accuracies": 1.0, "rewards/chosen": -0.104030542075634, "rewards/margins": 0.024125533178448677, "rewards/rejected": -0.12815608084201813, "step": 979 }, { "epoch": 2.5840474620962426, "grad_norm": 9.89041519165039, "learning_rate": 1.1144631765749777e-06, "log_odds_chosen": 0.46538037061691284, "log_odds_ratio": -0.4960182309150696, "logits/chosen": -1.1385443210601807, "logits/rejected": -1.0360300540924072, "logps/chosen": -1.3127050399780273, "logps/rejected": -1.6616865396499634, "loss": 2.3062, "nll_loss": 0.5269367098808289, "rewards/accuracies": 1.0, "rewards/chosen": -0.13127049803733826, "rewards/margins": 0.034898146986961365, "rewards/rejected": -0.16616865992546082, "step": 980 }, { "epoch": 2.5866842452208307, "grad_norm": 10.444801330566406, "learning_rate": 1.1073646850044364e-06, "log_odds_chosen": 0.5480303764343262, "log_odds_ratio": -0.4696999490261078, "logits/chosen": -1.195300817489624, "logits/rejected": -1.084096074104309, "logps/chosen": -1.353794813156128, "logps/rejected": -1.7875046730041504, "loss": 2.5014, "nll_loss": 0.5783883333206177, "rewards/accuracies": 1.0, "rewards/chosen": -0.13537949323654175, "rewards/margins": 0.043370962142944336, "rewards/rejected": -0.17875047028064728, "step": 981 }, { "epoch": 2.5893210283454184, "grad_norm": 10.700628280639648, "learning_rate": 1.1002661934338953e-06, "log_odds_chosen": 0.28562530875205994, "log_odds_ratio": -0.5658694505691528, "logits/chosen": -1.1571452617645264, "logits/rejected": -1.0048047304153442, "logps/chosen": -1.4264452457427979, "logps/rejected": -1.650299310684204, "loss": 3.1581, "nll_loss": 0.7329351902008057, "rewards/accuracies": 0.875, "rewards/chosen": -0.14264453947544098, "rewards/margins": 0.02238539233803749, "rewards/rejected": -0.16502991318702698, "step": 982 }, { "epoch": 2.5919578114700066, "grad_norm": 9.911328315734863, "learning_rate": 1.093167701863354e-06, "log_odds_chosen": 0.305277943611145, "log_odds_ratio": -0.5615626573562622, "logits/chosen": -1.147043228149414, "logits/rejected": -1.0166703462600708, "logps/chosen": -1.2626628875732422, "logps/rejected": -1.482170581817627, "loss": 2.2149, "nll_loss": 0.4975632131099701, "rewards/accuracies": 0.75, "rewards/chosen": -0.12626628577709198, "rewards/margins": 0.021950792521238327, "rewards/rejected": -0.1482170820236206, "step": 983 }, { "epoch": 2.5945945945945947, "grad_norm": 10.08964729309082, "learning_rate": 1.0860692102928127e-06, "log_odds_chosen": 0.601814866065979, "log_odds_ratio": -0.44974032044410706, "logits/chosen": -1.1483750343322754, "logits/rejected": -1.0418391227722168, "logps/chosen": -1.345975637435913, "logps/rejected": -1.8269295692443848, "loss": 2.444, "nll_loss": 0.5660194158554077, "rewards/accuracies": 0.875, "rewards/chosen": -0.1345975697040558, "rewards/margins": 0.04809538275003433, "rewards/rejected": -0.18269294500350952, "step": 984 }, { "epoch": 2.5972313777191824, "grad_norm": 10.545869827270508, "learning_rate": 1.0789707187222714e-06, "log_odds_chosen": 0.6214417219161987, "log_odds_ratio": -0.45351165533065796, "logits/chosen": -1.169751763343811, "logits/rejected": -1.0490249395370483, "logps/chosen": -1.2687312364578247, "logps/rejected": -1.7470698356628418, "loss": 2.3, "nll_loss": 0.5296406745910645, "rewards/accuracies": 0.875, "rewards/chosen": -0.12687312066555023, "rewards/margins": 0.04783385992050171, "rewards/rejected": -0.17470699548721313, "step": 985 }, { "epoch": 2.5998681608437706, "grad_norm": 9.85431957244873, "learning_rate": 1.0718722271517303e-06, "log_odds_chosen": 0.7516704797744751, "log_odds_ratio": -0.4033581614494324, "logits/chosen": -1.0369821786880493, "logits/rejected": -0.9772351384162903, "logps/chosen": -1.2994555234909058, "logps/rejected": -1.9012876749038696, "loss": 2.2013, "nll_loss": 0.5099843740463257, "rewards/accuracies": 1.0, "rewards/chosen": -0.1299455612897873, "rewards/margins": 0.060183219611644745, "rewards/rejected": -0.19012877345085144, "step": 986 }, { "epoch": 2.6025049439683587, "grad_norm": 9.786620140075684, "learning_rate": 1.064773735581189e-06, "log_odds_chosen": 0.8149322271347046, "log_odds_ratio": -0.3785549998283386, "logits/chosen": -1.0241363048553467, "logits/rejected": -0.9378775358200073, "logps/chosen": -1.2854642868041992, "logps/rejected": -1.9337173700332642, "loss": 2.0159, "nll_loss": 0.46613043546676636, "rewards/accuracies": 1.0, "rewards/chosen": -0.12854641675949097, "rewards/margins": 0.06482531130313873, "rewards/rejected": -0.1933717429637909, "step": 987 }, { "epoch": 2.6051417270929464, "grad_norm": 8.763384819030762, "learning_rate": 1.0576752440106479e-06, "log_odds_chosen": 1.1993224620819092, "log_odds_ratio": -0.2995642125606537, "logits/chosen": -1.037737250328064, "logits/rejected": -0.991753101348877, "logps/chosen": -1.0106923580169678, "logps/rejected": -1.905373454093933, "loss": 1.4312, "nll_loss": 0.32785069942474365, "rewards/accuracies": 1.0, "rewards/chosen": -0.10106924176216125, "rewards/margins": 0.0894681066274643, "rewards/rejected": -0.19053736329078674, "step": 988 }, { "epoch": 2.6077785102175346, "grad_norm": 9.29425048828125, "learning_rate": 1.0505767524401064e-06, "log_odds_chosen": 0.5740713477134705, "log_odds_ratio": -0.4664483070373535, "logits/chosen": -1.084150791168213, "logits/rejected": -0.9877736568450928, "logps/chosen": -1.2419626712799072, "logps/rejected": -1.6699585914611816, "loss": 1.8763, "nll_loss": 0.4224218726158142, "rewards/accuracies": 0.875, "rewards/chosen": -0.12419626116752625, "rewards/margins": 0.04279961809515953, "rewards/rejected": -0.16699588298797607, "step": 989 }, { "epoch": 2.6104152933421227, "grad_norm": 10.544453620910645, "learning_rate": 1.043478260869565e-06, "log_odds_chosen": 0.6520025730133057, "log_odds_ratio": -0.4675554931163788, "logits/chosen": -1.2331300973892212, "logits/rejected": -1.0616133213043213, "logps/chosen": -1.280714511871338, "logps/rejected": -1.8071590662002563, "loss": 2.3621, "nll_loss": 0.5437787175178528, "rewards/accuracies": 0.875, "rewards/chosen": -0.12807147204875946, "rewards/margins": 0.05264444649219513, "rewards/rejected": -0.1807159185409546, "step": 990 }, { "epoch": 2.6130520764667104, "grad_norm": 9.320026397705078, "learning_rate": 1.036379769299024e-06, "log_odds_chosen": 0.7140133380889893, "log_odds_ratio": -0.41474878787994385, "logits/chosen": -1.0829880237579346, "logits/rejected": -1.0305976867675781, "logps/chosen": -1.2512176036834717, "logps/rejected": -1.7967097759246826, "loss": 2.1951, "nll_loss": 0.5073036551475525, "rewards/accuracies": 1.0, "rewards/chosen": -0.12512175738811493, "rewards/margins": 0.05454923212528229, "rewards/rejected": -0.17967098951339722, "step": 991 }, { "epoch": 2.6156888595912986, "grad_norm": 9.70067024230957, "learning_rate": 1.0292812777284826e-06, "log_odds_chosen": 0.7287517786026001, "log_odds_ratio": -0.40351107716560364, "logits/chosen": -1.157404899597168, "logits/rejected": -1.0376935005187988, "logps/chosen": -1.2435426712036133, "logps/rejected": -1.8053914308547974, "loss": 2.1765, "nll_loss": 0.503783106803894, "rewards/accuracies": 1.0, "rewards/chosen": -0.1243542730808258, "rewards/margins": 0.05618486925959587, "rewards/rejected": -0.18053914606571198, "step": 992 }, { "epoch": 2.6183256427158867, "grad_norm": 9.9812593460083, "learning_rate": 1.0221827861579413e-06, "log_odds_chosen": 0.5076786875724792, "log_odds_ratio": -0.4871475100517273, "logits/chosen": -1.1644964218139648, "logits/rejected": -1.074755311012268, "logps/chosen": -1.2209006547927856, "logps/rejected": -1.6116774082183838, "loss": 2.2044, "nll_loss": 0.502373993396759, "rewards/accuracies": 1.0, "rewards/chosen": -0.12209007143974304, "rewards/margins": 0.03907766565680504, "rewards/rejected": -0.16116774082183838, "step": 993 }, { "epoch": 2.6209624258404745, "grad_norm": 10.192991256713867, "learning_rate": 1.0150842945874e-06, "log_odds_chosen": 0.8125649690628052, "log_odds_ratio": -0.41364622116088867, "logits/chosen": -1.1938300132751465, "logits/rejected": -1.0928840637207031, "logps/chosen": -1.1739590167999268, "logps/rejected": -1.7607444524765015, "loss": 2.5602, "nll_loss": 0.5986862182617188, "rewards/accuracies": 1.0, "rewards/chosen": -0.11739590018987656, "rewards/margins": 0.05867853760719299, "rewards/rejected": -0.17607444524765015, "step": 994 }, { "epoch": 2.6235992089650626, "grad_norm": 9.447900772094727, "learning_rate": 1.007985803016859e-06, "log_odds_chosen": 0.7173506617546082, "log_odds_ratio": -0.4152427315711975, "logits/chosen": -1.109144926071167, "logits/rejected": -1.0027085542678833, "logps/chosen": -1.1777777671813965, "logps/rejected": -1.7166709899902344, "loss": 1.954, "nll_loss": 0.4469866454601288, "rewards/accuracies": 1.0, "rewards/chosen": -0.11777777969837189, "rewards/margins": 0.05388931185007095, "rewards/rejected": -0.17166709899902344, "step": 995 }, { "epoch": 2.6262359920896508, "grad_norm": 10.02701473236084, "learning_rate": 1.0008873114463176e-06, "log_odds_chosen": 0.5110893249511719, "log_odds_ratio": -0.4991420805454254, "logits/chosen": -1.1886515617370605, "logits/rejected": -1.072214126586914, "logps/chosen": -1.2944159507751465, "logps/rejected": -1.6956514120101929, "loss": 2.3982, "nll_loss": 0.5496328473091125, "rewards/accuracies": 0.875, "rewards/chosen": -0.12944158911705017, "rewards/margins": 0.040123552083969116, "rewards/rejected": -0.1695651412010193, "step": 996 }, { "epoch": 2.628872775214239, "grad_norm": 10.027898788452148, "learning_rate": 9.937888198757763e-07, "log_odds_chosen": 0.6266584992408752, "log_odds_ratio": -0.4440193176269531, "logits/chosen": -1.1369056701660156, "logits/rejected": -1.0225248336791992, "logps/chosen": -1.290299415588379, "logps/rejected": -1.7772315740585327, "loss": 2.1245, "nll_loss": 0.48673126101493835, "rewards/accuracies": 1.0, "rewards/chosen": -0.12902995944023132, "rewards/margins": 0.048693202435970306, "rewards/rejected": -0.17772315442562103, "step": 997 }, { "epoch": 2.6315095583388266, "grad_norm": 9.200778007507324, "learning_rate": 9.866903283052352e-07, "log_odds_chosen": 0.43737131357192993, "log_odds_ratio": -0.5192932486534119, "logits/chosen": -1.0621488094329834, "logits/rejected": -0.9878532886505127, "logps/chosen": -1.3959318399429321, "logps/rejected": -1.7320349216461182, "loss": 2.0259, "nll_loss": 0.4545466899871826, "rewards/accuracies": 0.75, "rewards/chosen": -0.1395931839942932, "rewards/margins": 0.033610306680202484, "rewards/rejected": -0.1732034981250763, "step": 998 }, { "epoch": 2.6341463414634148, "grad_norm": 9.254485130310059, "learning_rate": 9.795918367346939e-07, "log_odds_chosen": 0.9543919563293457, "log_odds_ratio": -0.3626548647880554, "logits/chosen": -1.090482473373413, "logits/rejected": -1.0068384408950806, "logps/chosen": -1.0385923385620117, "logps/rejected": -1.72948157787323, "loss": 1.6749, "nll_loss": 0.3824600577354431, "rewards/accuracies": 0.875, "rewards/chosen": -0.10385924577713013, "rewards/margins": 0.06908892095088959, "rewards/rejected": -0.17294815182685852, "step": 999 }, { "epoch": 2.6367831245880025, "grad_norm": 10.900224685668945, "learning_rate": 9.724933451641526e-07, "log_odds_chosen": 0.4607086181640625, "log_odds_ratio": -0.5145030617713928, "logits/chosen": -1.1873807907104492, "logits/rejected": -1.0615363121032715, "logps/chosen": -1.4615896940231323, "logps/rejected": -1.8402800559997559, "loss": 2.5449, "nll_loss": 0.5847799777984619, "rewards/accuracies": 0.875, "rewards/chosen": -0.14615896344184875, "rewards/margins": 0.03786905109882355, "rewards/rejected": -0.1840280294418335, "step": 1000 }, { "epoch": 2.6394199077125906, "grad_norm": 10.294342994689941, "learning_rate": 9.653948535936112e-07, "log_odds_chosen": 0.5043836832046509, "log_odds_ratio": -0.48049962520599365, "logits/chosen": -1.198920726776123, "logits/rejected": -1.0782759189605713, "logps/chosen": -1.3148038387298584, "logps/rejected": -1.7069685459136963, "loss": 2.8996, "nll_loss": 0.6768454313278198, "rewards/accuracies": 0.875, "rewards/chosen": -0.1314803808927536, "rewards/margins": 0.03921646997332573, "rewards/rejected": -0.17069685459136963, "step": 1001 }, { "epoch": 2.6420566908371788, "grad_norm": 9.512928009033203, "learning_rate": 9.582963620230701e-07, "log_odds_chosen": 0.484348326921463, "log_odds_ratio": -0.4980863034725189, "logits/chosen": -1.150258183479309, "logits/rejected": -1.1008179187774658, "logps/chosen": -1.1577883958816528, "logps/rejected": -1.5190094709396362, "loss": 1.9338, "nll_loss": 0.43364277482032776, "rewards/accuracies": 1.0, "rewards/chosen": -0.115778848528862, "rewards/margins": 0.03612210601568222, "rewards/rejected": -0.15190094709396362, "step": 1002 }, { "epoch": 2.644693473961767, "grad_norm": 9.940478324890137, "learning_rate": 9.511978704525287e-07, "log_odds_chosen": 0.40882444381713867, "log_odds_ratio": -0.5205907821655273, "logits/chosen": -1.0754611492156982, "logits/rejected": -0.9930179119110107, "logps/chosen": -1.2754976749420166, "logps/rejected": -1.587766408920288, "loss": 2.0367, "nll_loss": 0.45710575580596924, "rewards/accuracies": 0.875, "rewards/chosen": -0.12754976749420166, "rewards/margins": 0.03122686967253685, "rewards/rejected": -0.1587766408920288, "step": 1003 }, { "epoch": 2.6473302570863546, "grad_norm": 8.932833671569824, "learning_rate": 9.440993788819875e-07, "log_odds_chosen": 0.5635305047035217, "log_odds_ratio": -0.4637274146080017, "logits/chosen": -1.158718228340149, "logits/rejected": -1.0395697355270386, "logps/chosen": -1.109891653060913, "logps/rejected": -1.51375150680542, "loss": 1.881, "nll_loss": 0.42386823892593384, "rewards/accuracies": 1.0, "rewards/chosen": -0.11098916828632355, "rewards/margins": 0.04038599506020546, "rewards/rejected": -0.1513751745223999, "step": 1004 }, { "epoch": 2.6499670402109428, "grad_norm": 10.461935043334961, "learning_rate": 9.370008873114462e-07, "log_odds_chosen": 0.6628550291061401, "log_odds_ratio": -0.4403079152107239, "logits/chosen": -1.0836372375488281, "logits/rejected": -1.057179570198059, "logps/chosen": -1.1715991497039795, "logps/rejected": -1.6733651161193848, "loss": 1.9231, "nll_loss": 0.43674010038375854, "rewards/accuracies": 0.875, "rewards/chosen": -0.11715991795063019, "rewards/margins": 0.05017659068107605, "rewards/rejected": -0.16733650863170624, "step": 1005 }, { "epoch": 2.6526038233355305, "grad_norm": 10.285652160644531, "learning_rate": 9.29902395740905e-07, "log_odds_chosen": 0.4264982342720032, "log_odds_ratio": -0.5198737382888794, "logits/chosen": -1.1921157836914062, "logits/rejected": -1.128915548324585, "logps/chosen": -1.3721623420715332, "logps/rejected": -1.705155611038208, "loss": 2.6975, "nll_loss": 0.6224000453948975, "rewards/accuracies": 1.0, "rewards/chosen": -0.1372162401676178, "rewards/margins": 0.033299338072538376, "rewards/rejected": -0.17051556706428528, "step": 1006 }, { "epoch": 2.6552406064601186, "grad_norm": 10.129936218261719, "learning_rate": 9.228039041703638e-07, "log_odds_chosen": 0.7587910890579224, "log_odds_ratio": -0.3909429609775543, "logits/chosen": -1.1581931114196777, "logits/rejected": -1.0162979364395142, "logps/chosen": -1.335216760635376, "logps/rejected": -1.9500994682312012, "loss": 2.2318, "nll_loss": 0.5188637375831604, "rewards/accuracies": 1.0, "rewards/chosen": -0.1335216760635376, "rewards/margins": 0.06148828566074371, "rewards/rejected": -0.1950099766254425, "step": 1007 }, { "epoch": 2.6578773895847068, "grad_norm": 9.60527515411377, "learning_rate": 9.157054125998225e-07, "log_odds_chosen": 0.9209575057029724, "log_odds_ratio": -0.37785103917121887, "logits/chosen": -1.0892844200134277, "logits/rejected": -0.9736959934234619, "logps/chosen": -1.2888574600219727, "logps/rejected": -2.035107374191284, "loss": 2.0299, "nll_loss": 0.46970081329345703, "rewards/accuracies": 1.0, "rewards/chosen": -0.12888574600219727, "rewards/margins": 0.07462498545646667, "rewards/rejected": -0.20351073145866394, "step": 1008 }, { "epoch": 2.660514172709295, "grad_norm": 10.338894844055176, "learning_rate": 9.086069210292813e-07, "log_odds_chosen": 0.47613853216171265, "log_odds_ratio": -0.5086878538131714, "logits/chosen": -1.1516331434249878, "logits/rejected": -1.1064794063568115, "logps/chosen": -1.222299337387085, "logps/rejected": -1.5618138313293457, "loss": 2.4068, "nll_loss": 0.5508235692977905, "rewards/accuracies": 0.875, "rewards/chosen": -0.1222299337387085, "rewards/margins": 0.03395145758986473, "rewards/rejected": -0.15618139505386353, "step": 1009 }, { "epoch": 2.6631509558338826, "grad_norm": 10.516220092773438, "learning_rate": 9.0150842945874e-07, "log_odds_chosen": 0.6181827783584595, "log_odds_ratio": -0.4441305994987488, "logits/chosen": -1.1471972465515137, "logits/rejected": -1.0572733879089355, "logps/chosen": -1.2600420713424683, "logps/rejected": -1.744326114654541, "loss": 2.1607, "nll_loss": 0.4957590699195862, "rewards/accuracies": 1.0, "rewards/chosen": -0.12600421905517578, "rewards/margins": 0.04842839762568474, "rewards/rejected": -0.17443260550498962, "step": 1010 }, { "epoch": 2.6657877389584708, "grad_norm": 10.016770362854004, "learning_rate": 8.944099378881988e-07, "log_odds_chosen": 0.7220852971076965, "log_odds_ratio": -0.47244396805763245, "logits/chosen": -1.1754839420318604, "logits/rejected": -1.073702096939087, "logps/chosen": -1.3390041589736938, "logps/rejected": -1.923474907875061, "loss": 2.4484, "nll_loss": 0.564854621887207, "rewards/accuracies": 0.75, "rewards/chosen": -0.13390041887760162, "rewards/margins": 0.05844707787036896, "rewards/rejected": -0.19234749674797058, "step": 1011 }, { "epoch": 2.6684245220830585, "grad_norm": 10.699859619140625, "learning_rate": 8.873114463176574e-07, "log_odds_chosen": 0.6538971662521362, "log_odds_ratio": -0.43241989612579346, "logits/chosen": -1.1091864109039307, "logits/rejected": -0.9737260937690735, "logps/chosen": -1.2799919843673706, "logps/rejected": -1.7287094593048096, "loss": 2.0487, "nll_loss": 0.46893632411956787, "rewards/accuracies": 1.0, "rewards/chosen": -0.1279992163181305, "rewards/margins": 0.044871747493743896, "rewards/rejected": -0.1728709489107132, "step": 1012 }, { "epoch": 2.6710613052076466, "grad_norm": 10.358034133911133, "learning_rate": 8.802129547471162e-07, "log_odds_chosen": 0.6818956732749939, "log_odds_ratio": -0.4728802740573883, "logits/chosen": -1.1002342700958252, "logits/rejected": -1.0167999267578125, "logps/chosen": -1.2693593502044678, "logps/rejected": -1.8089416027069092, "loss": 2.0842, "nll_loss": 0.4737650156021118, "rewards/accuracies": 0.875, "rewards/chosen": -0.1269359439611435, "rewards/margins": 0.0539582222700119, "rewards/rejected": -0.1808941662311554, "step": 1013 }, { "epoch": 2.6736980883322348, "grad_norm": 8.607314109802246, "learning_rate": 8.731144631765749e-07, "log_odds_chosen": 0.4106805920600891, "log_odds_ratio": -0.5259032249450684, "logits/chosen": -0.9953519701957703, "logits/rejected": -0.96965092420578, "logps/chosen": -1.1974859237670898, "logps/rejected": -1.4902080297470093, "loss": 1.3613, "nll_loss": 0.28773102164268494, "rewards/accuracies": 0.875, "rewards/chosen": -0.11974858492612839, "rewards/margins": 0.029272207990288734, "rewards/rejected": -0.14902080595493317, "step": 1014 }, { "epoch": 2.676334871456823, "grad_norm": 10.295807838439941, "learning_rate": 8.660159716060337e-07, "log_odds_chosen": 0.6526855826377869, "log_odds_ratio": -0.43829602003097534, "logits/chosen": -1.1492483615875244, "logits/rejected": -1.050119400024414, "logps/chosen": -1.2075819969177246, "logps/rejected": -1.6898386478424072, "loss": 2.4668, "nll_loss": 0.5728762745857239, "rewards/accuracies": 0.875, "rewards/chosen": -0.12075819820165634, "rewards/margins": 0.04822567105293274, "rewards/rejected": -0.16898387670516968, "step": 1015 }, { "epoch": 2.6789716545814106, "grad_norm": 11.07802963256836, "learning_rate": 8.589174800354925e-07, "log_odds_chosen": 0.4253402054309845, "log_odds_ratio": -0.510114312171936, "logits/chosen": -1.127990484237671, "logits/rejected": -1.0399631261825562, "logps/chosen": -1.4407004117965698, "logps/rejected": -1.7871440649032593, "loss": 2.7528, "nll_loss": 0.6371949315071106, "rewards/accuracies": 1.0, "rewards/chosen": -0.14407005906105042, "rewards/margins": 0.03464437276124954, "rewards/rejected": -0.17871442437171936, "step": 1016 }, { "epoch": 2.6816084377059988, "grad_norm": 9.816594123840332, "learning_rate": 8.518189884649511e-07, "log_odds_chosen": 0.8740766048431396, "log_odds_ratio": -0.3651520609855652, "logits/chosen": -1.0894010066986084, "logits/rejected": -0.9762166738510132, "logps/chosen": -1.3023083209991455, "logps/rejected": -1.9985140562057495, "loss": 2.0904, "nll_loss": 0.4860801696777344, "rewards/accuracies": 1.0, "rewards/chosen": -0.1302308291196823, "rewards/margins": 0.06962057203054428, "rewards/rejected": -0.199851393699646, "step": 1017 }, { "epoch": 2.6842452208305865, "grad_norm": 10.55259895324707, "learning_rate": 8.447204968944099e-07, "log_odds_chosen": 0.4953387975692749, "log_odds_ratio": -0.4922969341278076, "logits/chosen": -1.174715280532837, "logits/rejected": -1.0578200817108154, "logps/chosen": -1.3714582920074463, "logps/rejected": -1.740402102470398, "loss": 2.6346, "nll_loss": 0.609413743019104, "rewards/accuracies": 1.0, "rewards/chosen": -0.13714581727981567, "rewards/margins": 0.03689439594745636, "rewards/rejected": -0.17404022812843323, "step": 1018 }, { "epoch": 2.6868820039551746, "grad_norm": 9.052789688110352, "learning_rate": 8.376220053238686e-07, "log_odds_chosen": 0.707571268081665, "log_odds_ratio": -0.4167298376560211, "logits/chosen": -1.0386282205581665, "logits/rejected": -0.9862587451934814, "logps/chosen": -1.158585548400879, "logps/rejected": -1.6877071857452393, "loss": 1.6206, "nll_loss": 0.36347225308418274, "rewards/accuracies": 1.0, "rewards/chosen": -0.11585855484008789, "rewards/margins": 0.0529121533036232, "rewards/rejected": -0.16877073049545288, "step": 1019 }, { "epoch": 2.6895187870797628, "grad_norm": 9.590802192687988, "learning_rate": 8.305235137533274e-07, "log_odds_chosen": 0.7320303320884705, "log_odds_ratio": -0.4414646327495575, "logits/chosen": -1.1201412677764893, "logits/rejected": -1.0050376653671265, "logps/chosen": -1.2693010568618774, "logps/rejected": -1.8650025129318237, "loss": 2.0464, "nll_loss": 0.4674500524997711, "rewards/accuracies": 1.0, "rewards/chosen": -0.1269301176071167, "rewards/margins": 0.059570129960775375, "rewards/rejected": -0.18650023639202118, "step": 1020 }, { "epoch": 2.692155570204351, "grad_norm": 10.539416313171387, "learning_rate": 8.234250221827861e-07, "log_odds_chosen": 0.4314787983894348, "log_odds_ratio": -0.5104358792304993, "logits/chosen": -1.178997278213501, "logits/rejected": -1.054152488708496, "logps/chosen": -1.3437623977661133, "logps/rejected": -1.662109613418579, "loss": 2.4121, "nll_loss": 0.5519872307777405, "rewards/accuracies": 1.0, "rewards/chosen": -0.13437624275684357, "rewards/margins": 0.031834714114665985, "rewards/rejected": -0.16621094942092896, "step": 1021 }, { "epoch": 2.6947923533289386, "grad_norm": 9.64401626586914, "learning_rate": 8.163265306122449e-07, "log_odds_chosen": 0.456570565700531, "log_odds_ratio": -0.5173032879829407, "logits/chosen": -1.123307704925537, "logits/rejected": -1.084729790687561, "logps/chosen": -1.2351043224334717, "logps/rejected": -1.591335654258728, "loss": 2.1734, "nll_loss": 0.4916233718395233, "rewards/accuracies": 1.0, "rewards/chosen": -0.1235104352235794, "rewards/margins": 0.03562314808368683, "rewards/rejected": -0.15913358330726624, "step": 1022 }, { "epoch": 2.6974291364535268, "grad_norm": 9.88548755645752, "learning_rate": 8.092280390417035e-07, "log_odds_chosen": 0.6224034428596497, "log_odds_ratio": -0.4495518207550049, "logits/chosen": -1.125747799873352, "logits/rejected": -1.08957839012146, "logps/chosen": -1.132243275642395, "logps/rejected": -1.5897014141082764, "loss": 1.9675, "nll_loss": 0.44693225622177124, "rewards/accuracies": 1.0, "rewards/chosen": -0.11322431266307831, "rewards/margins": 0.045745816081762314, "rewards/rejected": -0.15897014737129211, "step": 1023 }, { "epoch": 2.7000659195781145, "grad_norm": 11.096732139587402, "learning_rate": 8.021295474711623e-07, "log_odds_chosen": 0.5247015357017517, "log_odds_ratio": -0.4764745533466339, "logits/chosen": -1.1881556510925293, "logits/rejected": -1.057640552520752, "logps/chosen": -1.435180425643921, "logps/rejected": -1.863472580909729, "loss": 2.5414, "nll_loss": 0.5876976847648621, "rewards/accuracies": 1.0, "rewards/chosen": -0.14351804554462433, "rewards/margins": 0.04282921925187111, "rewards/rejected": -0.18634726107120514, "step": 1024 }, { "epoch": 2.7027027027027026, "grad_norm": 9.868570327758789, "learning_rate": 7.95031055900621e-07, "log_odds_chosen": 0.49480122327804565, "log_odds_ratio": -0.5005102157592773, "logits/chosen": -1.0674026012420654, "logits/rejected": -1.0222371816635132, "logps/chosen": -1.2881180047988892, "logps/rejected": -1.6646883487701416, "loss": 1.908, "nll_loss": 0.4269576668739319, "rewards/accuracies": 0.875, "rewards/chosen": -0.1288118064403534, "rewards/margins": 0.037657029926776886, "rewards/rejected": -0.16646882891654968, "step": 1025 }, { "epoch": 2.705339485827291, "grad_norm": 10.239690780639648, "learning_rate": 7.879325643300798e-07, "log_odds_chosen": 0.2985216975212097, "log_odds_ratio": -0.5645860433578491, "logits/chosen": -1.2120201587677002, "logits/rejected": -1.108433723449707, "logps/chosen": -1.2694709300994873, "logps/rejected": -1.4897054433822632, "loss": 2.6176, "nll_loss": 0.5979484915733337, "rewards/accuracies": 1.0, "rewards/chosen": -0.12694710493087769, "rewards/margins": 0.02202344685792923, "rewards/rejected": -0.14897054433822632, "step": 1026 }, { "epoch": 2.707976268951879, "grad_norm": 9.421809196472168, "learning_rate": 7.808340727595386e-07, "log_odds_chosen": 0.8708092570304871, "log_odds_ratio": -0.4166063666343689, "logits/chosen": -1.0700078010559082, "logits/rejected": -1.040330171585083, "logps/chosen": -1.1527518033981323, "logps/rejected": -1.845388650894165, "loss": 1.6631, "nll_loss": 0.37410521507263184, "rewards/accuracies": 0.875, "rewards/chosen": -0.11527517437934875, "rewards/margins": 0.06926368176937103, "rewards/rejected": -0.18453887104988098, "step": 1027 }, { "epoch": 2.7106130520764666, "grad_norm": 9.974482536315918, "learning_rate": 7.737355811889973e-07, "log_odds_chosen": 0.6049374341964722, "log_odds_ratio": -0.4576120376586914, "logits/chosen": -1.0829582214355469, "logits/rejected": -0.9886510968208313, "logps/chosen": -1.4116907119750977, "logps/rejected": -1.8953813314437866, "loss": 2.2451, "nll_loss": 0.5155256390571594, "rewards/accuracies": 0.875, "rewards/chosen": -0.14116908609867096, "rewards/margins": 0.04836905375123024, "rewards/rejected": -0.1895381361246109, "step": 1028 }, { "epoch": 2.713249835201055, "grad_norm": 10.552530288696289, "learning_rate": 7.666370896184561e-07, "log_odds_chosen": 0.3404359817504883, "log_odds_ratio": -0.5574318170547485, "logits/chosen": -1.1303635835647583, "logits/rejected": -1.0355476140975952, "logps/chosen": -1.3993536233901978, "logps/rejected": -1.6630032062530518, "loss": 2.4588, "nll_loss": 0.5589630007743835, "rewards/accuracies": 0.75, "rewards/chosen": -0.13993537425994873, "rewards/margins": 0.026364946737885475, "rewards/rejected": -0.16630031168460846, "step": 1029 }, { "epoch": 2.7158866183256425, "grad_norm": 9.24097728729248, "learning_rate": 7.595385980479148e-07, "log_odds_chosen": 0.5806278586387634, "log_odds_ratio": -0.47258681058883667, "logits/chosen": -1.053170919418335, "logits/rejected": -1.014019250869751, "logps/chosen": -1.3315719366073608, "logps/rejected": -1.7810237407684326, "loss": 1.7611, "nll_loss": 0.39301273226737976, "rewards/accuracies": 1.0, "rewards/chosen": -0.13315720856189728, "rewards/margins": 0.04494518041610718, "rewards/rejected": -0.17810237407684326, "step": 1030 }, { "epoch": 2.7185234014502306, "grad_norm": 9.250251770019531, "learning_rate": 7.524401064773736e-07, "log_odds_chosen": 0.7768089771270752, "log_odds_ratio": -0.4117186963558197, "logits/chosen": -1.1114296913146973, "logits/rejected": -1.0370917320251465, "logps/chosen": -1.152336835861206, "logps/rejected": -1.7033677101135254, "loss": 1.8086, "nll_loss": 0.4109684228897095, "rewards/accuracies": 0.875, "rewards/chosen": -0.11523368209600449, "rewards/margins": 0.05510308966040611, "rewards/rejected": -0.1703367829322815, "step": 1031 }, { "epoch": 2.721160184574819, "grad_norm": 10.387153625488281, "learning_rate": 7.453416149068323e-07, "log_odds_chosen": 0.5035200715065002, "log_odds_ratio": -0.4789535105228424, "logits/chosen": -1.1551223993301392, "logits/rejected": -1.0495972633361816, "logps/chosen": -1.2403175830841064, "logps/rejected": -1.6171977519989014, "loss": 2.4095, "nll_loss": 0.5544703006744385, "rewards/accuracies": 1.0, "rewards/chosen": -0.12403176724910736, "rewards/margins": 0.0376880019903183, "rewards/rejected": -0.16171976923942566, "step": 1032 }, { "epoch": 2.723796967699407, "grad_norm": 10.71376895904541, "learning_rate": 7.382431233362911e-07, "log_odds_chosen": 0.26095426082611084, "log_odds_ratio": -0.576055645942688, "logits/chosen": -1.1644666194915771, "logits/rejected": -1.076171875, "logps/chosen": -1.3639743328094482, "logps/rejected": -1.5646965503692627, "loss": 2.7671, "nll_loss": 0.6341590285301208, "rewards/accuracies": 0.875, "rewards/chosen": -0.13639745116233826, "rewards/margins": 0.0200722087174654, "rewards/rejected": -0.15646964311599731, "step": 1033 }, { "epoch": 2.7264337508239946, "grad_norm": 9.896520614624023, "learning_rate": 7.311446317657497e-07, "log_odds_chosen": 0.5601645708084106, "log_odds_ratio": -0.4592554271221161, "logits/chosen": -1.172353982925415, "logits/rejected": -1.0247211456298828, "logps/chosen": -1.2512595653533936, "logps/rejected": -1.6812150478363037, "loss": 2.3149, "nll_loss": 0.5327982902526855, "rewards/accuracies": 1.0, "rewards/chosen": -0.1251259595155716, "rewards/margins": 0.042995553463697433, "rewards/rejected": -0.16812151670455933, "step": 1034 }, { "epoch": 2.729070533948583, "grad_norm": 10.839550971984863, "learning_rate": 7.240461401952085e-07, "log_odds_chosen": 0.7401673793792725, "log_odds_ratio": -0.4034544825553894, "logits/chosen": -1.150311827659607, "logits/rejected": -1.0346262454986572, "logps/chosen": -1.332601547241211, "logps/rejected": -1.9081275463104248, "loss": 2.5539, "nll_loss": 0.5981303453445435, "rewards/accuracies": 1.0, "rewards/chosen": -0.13326016068458557, "rewards/margins": 0.05755259841680527, "rewards/rejected": -0.19081275165081024, "step": 1035 }, { "epoch": 2.7317073170731705, "grad_norm": 10.373785972595215, "learning_rate": 7.169476486246672e-07, "log_odds_chosen": 0.4543488621711731, "log_odds_ratio": -0.5315455794334412, "logits/chosen": -1.2191540002822876, "logits/rejected": -1.04941987991333, "logps/chosen": -1.4185079336166382, "logps/rejected": -1.7377822399139404, "loss": 2.9067, "nll_loss": 0.6735115051269531, "rewards/accuracies": 0.75, "rewards/chosen": -0.1418507844209671, "rewards/margins": 0.031927429139614105, "rewards/rejected": -0.1737782210111618, "step": 1036 }, { "epoch": 2.7343441001977586, "grad_norm": 10.395633697509766, "learning_rate": 7.098491570541259e-07, "log_odds_chosen": 0.471661239862442, "log_odds_ratio": -0.4966796636581421, "logits/chosen": -1.2016971111297607, "logits/rejected": -1.094733476638794, "logps/chosen": -1.3901854753494263, "logps/rejected": -1.76613187789917, "loss": 2.6959, "nll_loss": 0.6243170499801636, "rewards/accuracies": 0.875, "rewards/chosen": -0.13901855051517487, "rewards/margins": 0.03759463131427765, "rewards/rejected": -0.17661318182945251, "step": 1037 }, { "epoch": 2.736980883322347, "grad_norm": 10.377779006958008, "learning_rate": 7.027506654835847e-07, "log_odds_chosen": 0.4487267732620239, "log_odds_ratio": -0.5085635185241699, "logits/chosen": -1.2070711851119995, "logits/rejected": -1.0505385398864746, "logps/chosen": -1.2501126527786255, "logps/rejected": -1.6021356582641602, "loss": 2.5371, "nll_loss": 0.5834246277809143, "rewards/accuracies": 0.875, "rewards/chosen": -0.12501126527786255, "rewards/margins": 0.03520228713750839, "rewards/rejected": -0.16021355986595154, "step": 1038 }, { "epoch": 2.739617666446935, "grad_norm": 10.630236625671387, "learning_rate": 6.956521739130434e-07, "log_odds_chosen": 0.4647771716117859, "log_odds_ratio": -0.4932706356048584, "logits/chosen": -1.10763418674469, "logits/rejected": -0.9972668886184692, "logps/chosen": -1.3534834384918213, "logps/rejected": -1.7255789041519165, "loss": 2.231, "nll_loss": 0.5084264874458313, "rewards/accuracies": 1.0, "rewards/chosen": -0.1353483498096466, "rewards/margins": 0.03720953315496445, "rewards/rejected": -0.17255789041519165, "step": 1039 }, { "epoch": 2.7422544495715226, "grad_norm": 10.011463165283203, "learning_rate": 6.885536823425022e-07, "log_odds_chosen": 0.5323688387870789, "log_odds_ratio": -0.48797640204429626, "logits/chosen": -1.1349525451660156, "logits/rejected": -1.0394479036331177, "logps/chosen": -1.2457324266433716, "logps/rejected": -1.574986457824707, "loss": 2.1416, "nll_loss": 0.4866067171096802, "rewards/accuracies": 1.0, "rewards/chosen": -0.1245732456445694, "rewards/margins": 0.03292540833353996, "rewards/rejected": -0.15749865770339966, "step": 1040 }, { "epoch": 2.744891232696111, "grad_norm": 10.38745403289795, "learning_rate": 6.814551907719609e-07, "log_odds_chosen": 0.7381571531295776, "log_odds_ratio": -0.424299955368042, "logits/chosen": -1.064079999923706, "logits/rejected": -0.9530540704727173, "logps/chosen": -1.4342451095581055, "logps/rejected": -2.0505714416503906, "loss": 2.7753, "nll_loss": 0.6513826847076416, "rewards/accuracies": 0.875, "rewards/chosen": -0.14342449605464935, "rewards/margins": 0.06163264811038971, "rewards/rejected": -0.20505714416503906, "step": 1041 }, { "epoch": 2.7475280158206985, "grad_norm": 10.014318466186523, "learning_rate": 6.743566992014197e-07, "log_odds_chosen": 0.41500961780548096, "log_odds_ratio": -0.5223526358604431, "logits/chosen": -1.108499526977539, "logits/rejected": -1.050550103187561, "logps/chosen": -1.314407229423523, "logps/rejected": -1.6437593698501587, "loss": 2.0834, "nll_loss": 0.46862050890922546, "rewards/accuracies": 0.875, "rewards/chosen": -0.13144072890281677, "rewards/margins": 0.032935213297605515, "rewards/rejected": -0.16437594592571259, "step": 1042 }, { "epoch": 2.7501647989452866, "grad_norm": 9.400856018066406, "learning_rate": 6.672582076308784e-07, "log_odds_chosen": 0.7966787219047546, "log_odds_ratio": -0.4011804759502411, "logits/chosen": -1.0503668785095215, "logits/rejected": -0.9597200751304626, "logps/chosen": -1.1999990940093994, "logps/rejected": -1.771064043045044, "loss": 1.7316, "nll_loss": 0.3927770256996155, "rewards/accuracies": 1.0, "rewards/chosen": -0.11999991536140442, "rewards/margins": 0.05710649862885475, "rewards/rejected": -0.17710641026496887, "step": 1043 }, { "epoch": 2.752801582069875, "grad_norm": 10.262003898620605, "learning_rate": 6.601597160603372e-07, "log_odds_chosen": 0.31851726770401, "log_odds_ratio": -0.5514330863952637, "logits/chosen": -1.088751196861267, "logits/rejected": -1.0029819011688232, "logps/chosen": -1.366959810256958, "logps/rejected": -1.6166133880615234, "loss": 2.2747, "nll_loss": 0.513523519039154, "rewards/accuracies": 1.0, "rewards/chosen": -0.1366959810256958, "rewards/margins": 0.02496536634862423, "rewards/rejected": -0.16166135668754578, "step": 1044 }, { "epoch": 2.755438365194463, "grad_norm": 9.898816108703613, "learning_rate": 6.530612244897958e-07, "log_odds_chosen": 0.5408321022987366, "log_odds_ratio": -0.47568681836128235, "logits/chosen": -1.1615869998931885, "logits/rejected": -1.0784975290298462, "logps/chosen": -1.4018906354904175, "logps/rejected": -1.839592695236206, "loss": 2.6957, "nll_loss": 0.6263567209243774, "rewards/accuracies": 0.875, "rewards/chosen": -0.140189066529274, "rewards/margins": 0.0437702052295208, "rewards/rejected": -0.18395927548408508, "step": 1045 }, { "epoch": 2.7580751483190507, "grad_norm": 10.726858139038086, "learning_rate": 6.459627329192546e-07, "log_odds_chosen": 0.4407050609588623, "log_odds_ratio": -0.54912930727005, "logits/chosen": -1.1333410739898682, "logits/rejected": -1.0956073999404907, "logps/chosen": -1.3223137855529785, "logps/rejected": -1.6678136587142944, "loss": 2.519, "nll_loss": 0.5748312473297119, "rewards/accuracies": 0.75, "rewards/chosen": -0.13223138451576233, "rewards/margins": 0.03454999625682831, "rewards/rejected": -0.16678138077259064, "step": 1046 }, { "epoch": 2.760711931443639, "grad_norm": 9.329651832580566, "learning_rate": 6.388642413487134e-07, "log_odds_chosen": 0.33870670199394226, "log_odds_ratio": -0.547798752784729, "logits/chosen": -1.0690044164657593, "logits/rejected": -1.0301674604415894, "logps/chosen": -1.1900187730789185, "logps/rejected": -1.4351589679718018, "loss": 1.7699, "nll_loss": 0.3877028226852417, "rewards/accuracies": 0.875, "rewards/chosen": -0.11900188028812408, "rewards/margins": 0.02451402135193348, "rewards/rejected": -0.14351589977741241, "step": 1047 }, { "epoch": 2.7633487145682265, "grad_norm": 10.1386137008667, "learning_rate": 6.317657497781721e-07, "log_odds_chosen": 0.764558732509613, "log_odds_ratio": -0.404604971408844, "logits/chosen": -1.1327184438705444, "logits/rejected": -1.0394474267959595, "logps/chosen": -1.3747018575668335, "logps/rejected": -1.9805411100387573, "loss": 2.3524, "nll_loss": 0.5476467609405518, "rewards/accuracies": 0.875, "rewards/chosen": -0.13747017085552216, "rewards/margins": 0.0605839341878891, "rewards/rejected": -0.19805411994457245, "step": 1048 }, { "epoch": 2.7659854976928147, "grad_norm": 9.465290069580078, "learning_rate": 6.246672582076309e-07, "log_odds_chosen": 0.5590122938156128, "log_odds_ratio": -0.4640856683254242, "logits/chosen": -1.127828598022461, "logits/rejected": -1.0180671215057373, "logps/chosen": -1.2410472631454468, "logps/rejected": -1.6706857681274414, "loss": 2.1327, "nll_loss": 0.48676055669784546, "rewards/accuracies": 1.0, "rewards/chosen": -0.12410472333431244, "rewards/margins": 0.04296386241912842, "rewards/rejected": -0.16706860065460205, "step": 1049 }, { "epoch": 2.768622280817403, "grad_norm": 10.116673469543457, "learning_rate": 6.175687666370896e-07, "log_odds_chosen": 0.6692229509353638, "log_odds_ratio": -0.41914093494415283, "logits/chosen": -1.077138066291809, "logits/rejected": -0.9766216278076172, "logps/chosen": -1.297964096069336, "logps/rejected": -1.8100271224975586, "loss": 1.9948, "nll_loss": 0.45677730441093445, "rewards/accuracies": 1.0, "rewards/chosen": -0.12979641556739807, "rewards/margins": 0.05120629817247391, "rewards/rejected": -0.18100272119045258, "step": 1050 }, { "epoch": 2.771259063941991, "grad_norm": 10.480387687683105, "learning_rate": 6.104702750665484e-07, "log_odds_chosen": 0.3245909810066223, "log_odds_ratio": -0.5469909310340881, "logits/chosen": -1.1878106594085693, "logits/rejected": -1.1436951160430908, "logps/chosen": -1.385441780090332, "logps/rejected": -1.6416648626327515, "loss": 2.7696, "nll_loss": 0.6376917958259583, "rewards/accuracies": 1.0, "rewards/chosen": -0.13854418694972992, "rewards/margins": 0.025622311979532242, "rewards/rejected": -0.16416651010513306, "step": 1051 }, { "epoch": 2.7738958470665787, "grad_norm": 10.034676551818848, "learning_rate": 6.033717834960071e-07, "log_odds_chosen": 0.4665338397026062, "log_odds_ratio": -0.5162329077720642, "logits/chosen": -1.1020002365112305, "logits/rejected": -1.0287086963653564, "logps/chosen": -1.3640254735946655, "logps/rejected": -1.7066876888275146, "loss": 2.4071, "nll_loss": 0.5501580834388733, "rewards/accuracies": 0.875, "rewards/chosen": -0.13640254735946655, "rewards/margins": 0.03426623344421387, "rewards/rejected": -0.17066878080368042, "step": 1052 }, { "epoch": 2.776532630191167, "grad_norm": 9.526391983032227, "learning_rate": 5.962732919254659e-07, "log_odds_chosen": 0.6806021332740784, "log_odds_ratio": -0.48156851530075073, "logits/chosen": -1.1003026962280273, "logits/rejected": -1.0384246110916138, "logps/chosen": -1.1482423543930054, "logps/rejected": -1.5512956380844116, "loss": 1.8907, "nll_loss": 0.42451637983322144, "rewards/accuracies": 0.75, "rewards/chosen": -0.11482425034046173, "rewards/margins": 0.04030532017350197, "rewards/rejected": -0.1551295667886734, "step": 1053 }, { "epoch": 2.7791694133157545, "grad_norm": 9.275099754333496, "learning_rate": 5.891748003549245e-07, "log_odds_chosen": 0.8472751975059509, "log_odds_ratio": -0.37624391913414, "logits/chosen": -1.054128646850586, "logits/rejected": -0.9725082516670227, "logps/chosen": -1.2834038734436035, "logps/rejected": -1.9531402587890625, "loss": 1.7921, "nll_loss": 0.41038990020751953, "rewards/accuracies": 1.0, "rewards/chosen": -0.12834039330482483, "rewards/margins": 0.06697362661361694, "rewards/rejected": -0.19531401991844177, "step": 1054 }, { "epoch": 2.7818061964403427, "grad_norm": 10.470935821533203, "learning_rate": 5.820763087843832e-07, "log_odds_chosen": 0.8075612783432007, "log_odds_ratio": -0.3843352198600769, "logits/chosen": -1.1219369173049927, "logits/rejected": -1.0306581258773804, "logps/chosen": -1.3321828842163086, "logps/rejected": -1.9868505001068115, "loss": 2.211, "nll_loss": 0.5143091082572937, "rewards/accuracies": 1.0, "rewards/chosen": -0.13321830332279205, "rewards/margins": 0.06546676158905029, "rewards/rejected": -0.19868505001068115, "step": 1055 }, { "epoch": 2.784442979564931, "grad_norm": 9.963431358337402, "learning_rate": 5.74977817213842e-07, "log_odds_chosen": 0.533049464225769, "log_odds_ratio": -0.48167720437049866, "logits/chosen": -1.0858891010284424, "logits/rejected": -1.0543181896209717, "logps/chosen": -1.3295893669128418, "logps/rejected": -1.7450724840164185, "loss": 2.1299, "nll_loss": 0.4843091368675232, "rewards/accuracies": 1.0, "rewards/chosen": -0.13295894861221313, "rewards/margins": 0.04154830053448677, "rewards/rejected": -0.1745072603225708, "step": 1056 }, { "epoch": 2.787079762689519, "grad_norm": 9.712507247924805, "learning_rate": 5.678793256433007e-07, "log_odds_chosen": 0.5920177698135376, "log_odds_ratio": -0.4669216275215149, "logits/chosen": -1.1910808086395264, "logits/rejected": -1.0788345336914062, "logps/chosen": -1.1530779600143433, "logps/rejected": -1.581312894821167, "loss": 2.478, "nll_loss": 0.5728002786636353, "rewards/accuracies": 1.0, "rewards/chosen": -0.11530779302120209, "rewards/margins": 0.04282350093126297, "rewards/rejected": -0.15813130140304565, "step": 1057 }, { "epoch": 2.7897165458141067, "grad_norm": 9.908315658569336, "learning_rate": 5.607808340727595e-07, "log_odds_chosen": 0.884239673614502, "log_odds_ratio": -0.3546842038631439, "logits/chosen": -1.1526079177856445, "logits/rejected": -1.0145677328109741, "logps/chosen": -1.288661003112793, "logps/rejected": -1.9862499237060547, "loss": 2.0295, "nll_loss": 0.47191494703292847, "rewards/accuracies": 1.0, "rewards/chosen": -0.12886610627174377, "rewards/margins": 0.06975888460874557, "rewards/rejected": -0.19862499833106995, "step": 1058 }, { "epoch": 2.792353328938695, "grad_norm": 9.855311393737793, "learning_rate": 5.536823425022182e-07, "log_odds_chosen": 0.7123335599899292, "log_odds_ratio": -0.4272412061691284, "logits/chosen": -1.1070234775543213, "logits/rejected": -1.0540480613708496, "logps/chosen": -1.2021454572677612, "logps/rejected": -1.7236651182174683, "loss": 2.0172, "nll_loss": 0.46157926321029663, "rewards/accuracies": 0.875, "rewards/chosen": -0.1202145516872406, "rewards/margins": 0.052151963114738464, "rewards/rejected": -0.17236651480197906, "step": 1059 }, { "epoch": 2.794990112063283, "grad_norm": 10.23788070678711, "learning_rate": 5.46583850931677e-07, "log_odds_chosen": 0.5830734372138977, "log_odds_ratio": -0.45574843883514404, "logits/chosen": -1.072530746459961, "logits/rejected": -0.9956455826759338, "logps/chosen": -1.3934428691864014, "logps/rejected": -1.8281140327453613, "loss": 2.2617, "nll_loss": 0.5198531150817871, "rewards/accuracies": 1.0, "rewards/chosen": -0.13934428989887238, "rewards/margins": 0.043467119336128235, "rewards/rejected": -0.1828114092350006, "step": 1060 }, { "epoch": 2.7976268951878707, "grad_norm": 9.85499382019043, "learning_rate": 5.394853593611357e-07, "log_odds_chosen": 0.8616229295730591, "log_odds_ratio": -0.3569209575653076, "logits/chosen": -1.1381977796554565, "logits/rejected": -1.0240377187728882, "logps/chosen": -1.2420525550842285, "logps/rejected": -1.9140177965164185, "loss": 2.1363, "nll_loss": 0.49839523434638977, "rewards/accuracies": 1.0, "rewards/chosen": -0.12420526146888733, "rewards/margins": 0.06719652563333511, "rewards/rejected": -0.19140177965164185, "step": 1061 }, { "epoch": 2.800263678312459, "grad_norm": 10.282552719116211, "learning_rate": 5.323868677905945e-07, "log_odds_chosen": 0.3330332934856415, "log_odds_ratio": -0.5529873371124268, "logits/chosen": -1.1650663614273071, "logits/rejected": -1.070693016052246, "logps/chosen": -1.3323811292648315, "logps/rejected": -1.5864946842193604, "loss": 2.4555, "nll_loss": 0.5585712790489197, "rewards/accuracies": 0.875, "rewards/chosen": -0.13323810696601868, "rewards/margins": 0.025411371141672134, "rewards/rejected": -0.1586494743824005, "step": 1062 }, { "epoch": 2.802900461437047, "grad_norm": 10.069133758544922, "learning_rate": 5.252883762200532e-07, "log_odds_chosen": 0.7132698893547058, "log_odds_ratio": -0.4185967445373535, "logits/chosen": -1.0890593528747559, "logits/rejected": -1.0079560279846191, "logps/chosen": -1.2254939079284668, "logps/rejected": -1.7366520166397095, "loss": 2.1125, "nll_loss": 0.48625391721725464, "rewards/accuracies": 1.0, "rewards/chosen": -0.1225493997335434, "rewards/margins": 0.05111580714583397, "rewards/rejected": -0.17366519570350647, "step": 1063 }, { "epoch": 2.8055372445616347, "grad_norm": 10.150778770446777, "learning_rate": 5.18189884649512e-07, "log_odds_chosen": 0.5579012632369995, "log_odds_ratio": -0.4871813654899597, "logits/chosen": -1.117282509803772, "logits/rejected": -1.0329712629318237, "logps/chosen": -1.3524284362792969, "logps/rejected": -1.8107136487960815, "loss": 2.4066, "nll_loss": 0.5529344081878662, "rewards/accuracies": 0.875, "rewards/chosen": -0.13524284958839417, "rewards/margins": 0.04582851752638817, "rewards/rejected": -0.18107135593891144, "step": 1064 }, { "epoch": 2.808174027686223, "grad_norm": 9.787497520446777, "learning_rate": 5.110913930789707e-07, "log_odds_chosen": 0.619898796081543, "log_odds_ratio": -0.4415004551410675, "logits/chosen": -1.0507391691207886, "logits/rejected": -0.9704097509384155, "logps/chosen": -1.3528366088867188, "logps/rejected": -1.8461878299713135, "loss": 2.2188, "nll_loss": 0.5105401277542114, "rewards/accuracies": 1.0, "rewards/chosen": -0.13528364896774292, "rewards/margins": 0.04933511093258858, "rewards/rejected": -0.1846187710762024, "step": 1065 }, { "epoch": 2.810810810810811, "grad_norm": 10.181516647338867, "learning_rate": 5.039929015084295e-07, "log_odds_chosen": 0.4866787791252136, "log_odds_ratio": -0.5008502006530762, "logits/chosen": -1.135515570640564, "logits/rejected": -1.0233913660049438, "logps/chosen": -1.3594303131103516, "logps/rejected": -1.7316761016845703, "loss": 2.1966, "nll_loss": 0.49907565116882324, "rewards/accuracies": 0.875, "rewards/chosen": -0.13594304025173187, "rewards/margins": 0.037224579602479935, "rewards/rejected": -0.1731676161289215, "step": 1066 }, { "epoch": 2.8134475939353987, "grad_norm": 10.298295974731445, "learning_rate": 4.968944099378881e-07, "log_odds_chosen": 0.40575116872787476, "log_odds_ratio": -0.5261607766151428, "logits/chosen": -1.1635947227478027, "logits/rejected": -1.082903265953064, "logps/chosen": -1.376260757446289, "logps/rejected": -1.6973493099212646, "loss": 2.7818, "nll_loss": 0.6428461670875549, "rewards/accuracies": 0.875, "rewards/chosen": -0.13762608170509338, "rewards/margins": 0.0321088507771492, "rewards/rejected": -0.169734925031662, "step": 1067 }, { "epoch": 2.816084377059987, "grad_norm": 9.991827964782715, "learning_rate": 4.897959183673469e-07, "log_odds_chosen": 0.5437269806861877, "log_odds_ratio": -0.4750468134880066, "logits/chosen": -1.203829288482666, "logits/rejected": -1.0508911609649658, "logps/chosen": -1.3043091297149658, "logps/rejected": -1.7314250469207764, "loss": 2.4969, "nll_loss": 0.5767324566841125, "rewards/accuracies": 0.875, "rewards/chosen": -0.1304309070110321, "rewards/margins": 0.04271160066127777, "rewards/rejected": -0.17314249277114868, "step": 1068 }, { "epoch": 2.818721160184575, "grad_norm": 9.926018714904785, "learning_rate": 4.826974267968056e-07, "log_odds_chosen": 0.6212695837020874, "log_odds_ratio": -0.4382473826408386, "logits/chosen": -1.1351120471954346, "logits/rejected": -1.0052011013031006, "logps/chosen": -1.2674087285995483, "logps/rejected": -1.7482867240905762, "loss": 2.0097, "nll_loss": 0.4585903286933899, "rewards/accuracies": 1.0, "rewards/chosen": -0.12674087285995483, "rewards/margins": 0.04808780550956726, "rewards/rejected": -0.1748286783695221, "step": 1069 }, { "epoch": 2.8213579433091627, "grad_norm": 9.707475662231445, "learning_rate": 4.7559893522626436e-07, "log_odds_chosen": 0.8172865509986877, "log_odds_ratio": -0.39224159717559814, "logits/chosen": -1.1351253986358643, "logits/rejected": -0.9938197135925293, "logps/chosen": -1.233187198638916, "logps/rejected": -1.8230117559432983, "loss": 2.2785, "nll_loss": 0.5304071307182312, "rewards/accuracies": 1.0, "rewards/chosen": -0.12331871688365936, "rewards/margins": 0.05898246169090271, "rewards/rejected": -0.18230119347572327, "step": 1070 }, { "epoch": 2.823994726433751, "grad_norm": 9.760191917419434, "learning_rate": 4.685004436557231e-07, "log_odds_chosen": 0.48987436294555664, "log_odds_ratio": -0.5002853274345398, "logits/chosen": -1.0603246688842773, "logits/rejected": -1.0183500051498413, "logps/chosen": -1.2384836673736572, "logps/rejected": -1.5949968099594116, "loss": 2.3909, "nll_loss": 0.5476902723312378, "rewards/accuracies": 1.0, "rewards/chosen": -0.12384836375713348, "rewards/margins": 0.03565133363008499, "rewards/rejected": -0.15949970483779907, "step": 1071 }, { "epoch": 2.826631509558339, "grad_norm": 10.26170539855957, "learning_rate": 4.614019520851819e-07, "log_odds_chosen": 0.6463963985443115, "log_odds_ratio": -0.4377222955226898, "logits/chosen": -1.1595723628997803, "logits/rejected": -1.0640404224395752, "logps/chosen": -1.2628695964813232, "logps/rejected": -1.7612965106964111, "loss": 2.3593, "nll_loss": 0.5460578799247742, "rewards/accuracies": 1.0, "rewards/chosen": -0.12628696858882904, "rewards/margins": 0.04984269291162491, "rewards/rejected": -0.17612966895103455, "step": 1072 }, { "epoch": 2.8292682926829267, "grad_norm": 9.662675857543945, "learning_rate": 4.5430346051464064e-07, "log_odds_chosen": 0.44240912795066833, "log_odds_ratio": -0.5153103470802307, "logits/chosen": -1.1566162109375, "logits/rejected": -1.0911320447921753, "logps/chosen": -1.233819842338562, "logps/rejected": -1.562687873840332, "loss": 2.2785, "nll_loss": 0.51810622215271, "rewards/accuracies": 0.875, "rewards/chosen": -0.12338199466466904, "rewards/margins": 0.0328868106007576, "rewards/rejected": -0.15626880526542664, "step": 1073 }, { "epoch": 2.831905075807515, "grad_norm": 10.193406105041504, "learning_rate": 4.472049689440994e-07, "log_odds_chosen": 0.8326815962791443, "log_odds_ratio": -0.38279303908348083, "logits/chosen": -1.1431697607040405, "logits/rejected": -1.0575648546218872, "logps/chosen": -1.3324904441833496, "logps/rejected": -2.0087924003601074, "loss": 2.2275, "nll_loss": 0.5186055898666382, "rewards/accuracies": 1.0, "rewards/chosen": -0.13324904441833496, "rewards/margins": 0.06763018667697906, "rewards/rejected": -0.20087924599647522, "step": 1074 }, { "epoch": 2.834541858932103, "grad_norm": 10.405094146728516, "learning_rate": 4.401064773735581e-07, "log_odds_chosen": 0.7254148721694946, "log_odds_ratio": -0.4014751613140106, "logits/chosen": -1.1544923782348633, "logits/rejected": -0.9924436211585999, "logps/chosen": -1.3107327222824097, "logps/rejected": -1.877253770828247, "loss": 2.3186, "nll_loss": 0.5394942760467529, "rewards/accuracies": 1.0, "rewards/chosen": -0.13107328116893768, "rewards/margins": 0.05665209889411926, "rewards/rejected": -0.18772536516189575, "step": 1075 }, { "epoch": 2.8371786420566907, "grad_norm": 9.991864204406738, "learning_rate": 4.3300798580301686e-07, "log_odds_chosen": 0.766412615776062, "log_odds_ratio": -0.40515220165252686, "logits/chosen": -1.0913034677505493, "logits/rejected": -1.0271958112716675, "logps/chosen": -1.3255186080932617, "logps/rejected": -1.9466503858566284, "loss": 2.022, "nll_loss": 0.4649835228919983, "rewards/accuracies": 1.0, "rewards/chosen": -0.1325518637895584, "rewards/margins": 0.06211317703127861, "rewards/rejected": -0.19466502964496613, "step": 1076 }, { "epoch": 2.839815425181279, "grad_norm": 10.100076675415039, "learning_rate": 4.2590949423247555e-07, "log_odds_chosen": 0.6009469032287598, "log_odds_ratio": -0.45665615797042847, "logits/chosen": -1.1655510663986206, "logits/rejected": -1.0256352424621582, "logps/chosen": -1.3076218366622925, "logps/rejected": -1.7924292087554932, "loss": 2.5451, "nll_loss": 0.5906082987785339, "rewards/accuracies": 1.0, "rewards/chosen": -0.13076218962669373, "rewards/margins": 0.04848073422908783, "rewards/rejected": -0.17924290895462036, "step": 1077 }, { "epoch": 2.842452208305867, "grad_norm": 9.875173568725586, "learning_rate": 4.188110026619343e-07, "log_odds_chosen": 0.7876337766647339, "log_odds_ratio": -0.42633935809135437, "logits/chosen": -1.1450986862182617, "logits/rejected": -1.0313525199890137, "logps/chosen": -1.335124135017395, "logps/rejected": -2.00093936920166, "loss": 2.1638, "nll_loss": 0.49831515550613403, "rewards/accuracies": 0.875, "rewards/chosen": -0.13351240754127502, "rewards/margins": 0.06658150255680084, "rewards/rejected": -0.20009392499923706, "step": 1078 }, { "epoch": 2.8450889914304547, "grad_norm": 9.214183807373047, "learning_rate": 4.1171251109139303e-07, "log_odds_chosen": 0.5808621644973755, "log_odds_ratio": -0.49951937794685364, "logits/chosen": -1.079819679260254, "logits/rejected": -1.0531589984893799, "logps/chosen": -1.077141523361206, "logps/rejected": -1.4757481813430786, "loss": 1.7651, "nll_loss": 0.39131635427474976, "rewards/accuracies": 0.875, "rewards/chosen": -0.10771416127681732, "rewards/margins": 0.039860669523477554, "rewards/rejected": -0.14757482707500458, "step": 1079 }, { "epoch": 2.847725774555043, "grad_norm": 10.297467231750488, "learning_rate": 4.0461401952085177e-07, "log_odds_chosen": 0.4811196029186249, "log_odds_ratio": -0.4953339695930481, "logits/chosen": -1.0673904418945312, "logits/rejected": -0.9793952703475952, "logps/chosen": -1.2865219116210938, "logps/rejected": -1.6545686721801758, "loss": 2.3899, "nll_loss": 0.5479344725608826, "rewards/accuracies": 1.0, "rewards/chosen": -0.1286522001028061, "rewards/margins": 0.03680466115474701, "rewards/rejected": -0.1654568612575531, "step": 1080 }, { "epoch": 2.850362557679631, "grad_norm": 9.687300682067871, "learning_rate": 3.975155279503105e-07, "log_odds_chosen": 0.7552241086959839, "log_odds_ratio": -0.40107399225234985, "logits/chosen": -1.1260749101638794, "logits/rejected": -1.0342764854431152, "logps/chosen": -1.2746638059616089, "logps/rejected": -1.8723293542861938, "loss": 2.1534, "nll_loss": 0.4982360005378723, "rewards/accuracies": 1.0, "rewards/chosen": -0.1274663805961609, "rewards/margins": 0.05976655334234238, "rewards/rejected": -0.18723294138908386, "step": 1081 }, { "epoch": 2.852999340804219, "grad_norm": 10.732072830200195, "learning_rate": 3.904170363797693e-07, "log_odds_chosen": 0.6202609539031982, "log_odds_ratio": -0.45681479573249817, "logits/chosen": -1.0848345756530762, "logits/rejected": -1.0748814344406128, "logps/chosen": -1.1798266172409058, "logps/rejected": -1.6549553871154785, "loss": 1.8035, "nll_loss": 0.4052017629146576, "rewards/accuracies": 1.0, "rewards/chosen": -0.1179826632142067, "rewards/margins": 0.047512881457805634, "rewards/rejected": -0.16549554467201233, "step": 1082 }, { "epoch": 2.855636123928807, "grad_norm": 10.704569816589355, "learning_rate": 3.8331854480922805e-07, "log_odds_chosen": 0.4765079617500305, "log_odds_ratio": -0.49119317531585693, "logits/chosen": -1.2375128269195557, "logits/rejected": -1.0925644636154175, "logps/chosen": -1.4868674278259277, "logps/rejected": -1.8717963695526123, "loss": 2.8802, "nll_loss": 0.6709375977516174, "rewards/accuracies": 1.0, "rewards/chosen": -0.1486867368221283, "rewards/margins": 0.03849291056394577, "rewards/rejected": -0.18717963993549347, "step": 1083 }, { "epoch": 2.858272907053395, "grad_norm": 10.494728088378906, "learning_rate": 3.762200532386868e-07, "log_odds_chosen": 0.6826547384262085, "log_odds_ratio": -0.4175352454185486, "logits/chosen": -1.1123088598251343, "logits/rejected": -1.03884756565094, "logps/chosen": -1.3633010387420654, "logps/rejected": -1.9031158685684204, "loss": 2.2859, "nll_loss": 0.5297098159790039, "rewards/accuracies": 1.0, "rewards/chosen": -0.13633009791374207, "rewards/margins": 0.053981490433216095, "rewards/rejected": -0.19031159579753876, "step": 1084 }, { "epoch": 2.8609096901779827, "grad_norm": 10.636738777160645, "learning_rate": 3.6912156166814553e-07, "log_odds_chosen": 0.46351560950279236, "log_odds_ratio": -0.49620485305786133, "logits/chosen": -1.127618432044983, "logits/rejected": -1.0073745250701904, "logps/chosen": -1.481499433517456, "logps/rejected": -1.860403299331665, "loss": 2.6967, "nll_loss": 0.6245455741882324, "rewards/accuracies": 1.0, "rewards/chosen": -0.14814993739128113, "rewards/margins": 0.037890393286943436, "rewards/rejected": -0.18604034185409546, "step": 1085 }, { "epoch": 2.863546473302571, "grad_norm": 10.276351928710938, "learning_rate": 3.6202307009760427e-07, "log_odds_chosen": 0.5640963315963745, "log_odds_ratio": -0.4584580361843109, "logits/chosen": -1.1923645734786987, "logits/rejected": -1.0474330186843872, "logps/chosen": -1.3473620414733887, "logps/rejected": -1.795386791229248, "loss": 2.3472, "nll_loss": 0.540945291519165, "rewards/accuracies": 1.0, "rewards/chosen": -0.13473621010780334, "rewards/margins": 0.0448024608194828, "rewards/rejected": -0.17953866720199585, "step": 1086 }, { "epoch": 2.866183256427159, "grad_norm": 9.075798034667969, "learning_rate": 3.5492457852706296e-07, "log_odds_chosen": 0.740669846534729, "log_odds_ratio": -0.40852227807044983, "logits/chosen": -1.0822901725769043, "logits/rejected": -1.02024245262146, "logps/chosen": -1.1343278884887695, "logps/rejected": -1.6959733963012695, "loss": 1.8156, "nll_loss": 0.4130437970161438, "rewards/accuracies": 1.0, "rewards/chosen": -0.11343279480934143, "rewards/margins": 0.05616454780101776, "rewards/rejected": -0.1695973426103592, "step": 1087 }, { "epoch": 2.868820039551747, "grad_norm": 10.139521598815918, "learning_rate": 3.478260869565217e-07, "log_odds_chosen": 0.6256661415100098, "log_odds_ratio": -0.4504834711551666, "logits/chosen": -1.1899868249893188, "logits/rejected": -1.0847022533416748, "logps/chosen": -1.3469818830490112, "logps/rejected": -1.8414311408996582, "loss": 2.4276, "nll_loss": 0.5618480443954468, "rewards/accuracies": 1.0, "rewards/chosen": -0.13469818234443665, "rewards/margins": 0.04944493994116783, "rewards/rejected": -0.18414312601089478, "step": 1088 }, { "epoch": 2.871456822676335, "grad_norm": 9.940681457519531, "learning_rate": 3.4072759538598044e-07, "log_odds_chosen": 0.6833094358444214, "log_odds_ratio": -0.42807725071907043, "logits/chosen": -1.134655237197876, "logits/rejected": -1.0335592031478882, "logps/chosen": -1.3730700016021729, "logps/rejected": -1.9044145345687866, "loss": 2.3156, "nll_loss": 0.5361028909683228, "rewards/accuracies": 0.875, "rewards/chosen": -0.13730700314044952, "rewards/margins": 0.05313445255160332, "rewards/rejected": -0.19044145941734314, "step": 1089 }, { "epoch": 2.874093605800923, "grad_norm": 9.627927780151367, "learning_rate": 3.336291038154392e-07, "log_odds_chosen": 0.5087884664535522, "log_odds_ratio": -0.4863870143890381, "logits/chosen": -1.1259316205978394, "logits/rejected": -1.076722502708435, "logps/chosen": -1.1586689949035645, "logps/rejected": -1.541443109512329, "loss": 1.8021, "nll_loss": 0.40189826488494873, "rewards/accuracies": 1.0, "rewards/chosen": -0.11586690694093704, "rewards/margins": 0.03827742487192154, "rewards/rejected": -0.15414433181285858, "step": 1090 }, { "epoch": 2.8767303889255107, "grad_norm": 10.469525337219238, "learning_rate": 3.265306122448979e-07, "log_odds_chosen": 0.6726866364479065, "log_odds_ratio": -0.4167555570602417, "logits/chosen": -1.1142288446426392, "logits/rejected": -1.0072951316833496, "logps/chosen": -1.269399642944336, "logps/rejected": -1.794497013092041, "loss": 2.2729, "nll_loss": 0.5265529751777649, "rewards/accuracies": 1.0, "rewards/chosen": -0.12693995237350464, "rewards/margins": 0.05250975862145424, "rewards/rejected": -0.17944972217082977, "step": 1091 }, { "epoch": 2.879367172050099, "grad_norm": 10.261611938476562, "learning_rate": 3.194321206743567e-07, "log_odds_chosen": 0.3666892647743225, "log_odds_ratio": -0.5469551086425781, "logits/chosen": -1.1116154193878174, "logits/rejected": -1.0619001388549805, "logps/chosen": -1.2703733444213867, "logps/rejected": -1.5594353675842285, "loss": 2.1767, "nll_loss": 0.4894874095916748, "rewards/accuracies": 0.75, "rewards/chosen": -0.12703733146190643, "rewards/margins": 0.028906218707561493, "rewards/rejected": -0.15594354271888733, "step": 1092 }, { "epoch": 2.882003955174687, "grad_norm": 9.441848754882812, "learning_rate": 3.1233362910381546e-07, "log_odds_chosen": 0.5594149231910706, "log_odds_ratio": -0.48090699315071106, "logits/chosen": -1.0834064483642578, "logits/rejected": -1.0091333389282227, "logps/chosen": -1.3397884368896484, "logps/rejected": -1.7731108665466309, "loss": 2.0474, "nll_loss": 0.46376368403434753, "rewards/accuracies": 0.875, "rewards/chosen": -0.13397884368896484, "rewards/margins": 0.043332237750291824, "rewards/rejected": -0.17731109261512756, "step": 1093 }, { "epoch": 2.884640738299275, "grad_norm": 10.408712387084961, "learning_rate": 3.052351375332742e-07, "log_odds_chosen": 0.42781007289886475, "log_odds_ratio": -0.5116385817527771, "logits/chosen": -1.1213773488998413, "logits/rejected": -1.0789302587509155, "logps/chosen": -1.3997325897216797, "logps/rejected": -1.7490270137786865, "loss": 2.2369, "nll_loss": 0.5080611705780029, "rewards/accuracies": 1.0, "rewards/chosen": -0.1399732530117035, "rewards/margins": 0.03492945432662964, "rewards/rejected": -0.17490270733833313, "step": 1094 }, { "epoch": 2.887277521423863, "grad_norm": 9.774060249328613, "learning_rate": 2.9813664596273294e-07, "log_odds_chosen": 0.7794356346130371, "log_odds_ratio": -0.3976297378540039, "logits/chosen": -1.1235860586166382, "logits/rejected": -1.064821720123291, "logps/chosen": -1.1625523567199707, "logps/rejected": -1.7622184753417969, "loss": 1.855, "nll_loss": 0.4239905774593353, "rewards/accuracies": 0.875, "rewards/chosen": -0.11625523865222931, "rewards/margins": 0.05996660143136978, "rewards/rejected": -0.1762218326330185, "step": 1095 }, { "epoch": 2.889914304548451, "grad_norm": 10.065715789794922, "learning_rate": 2.910381543921916e-07, "log_odds_chosen": 0.6282092928886414, "log_odds_ratio": -0.4407644271850586, "logits/chosen": -1.1285327672958374, "logits/rejected": -1.012041449546814, "logps/chosen": -1.172711968421936, "logps/rejected": -1.6262803077697754, "loss": 2.2328, "nll_loss": 0.5141250491142273, "rewards/accuracies": 1.0, "rewards/chosen": -0.11727119982242584, "rewards/margins": 0.04535682499408722, "rewards/rejected": -0.16262802481651306, "step": 1096 }, { "epoch": 2.8925510876730387, "grad_norm": 11.159720420837402, "learning_rate": 2.8393966282165037e-07, "log_odds_chosen": 0.35956841707229614, "log_odds_ratio": -0.5396775007247925, "logits/chosen": -1.1792635917663574, "logits/rejected": -1.032005786895752, "logps/chosen": -1.3383150100708008, "logps/rejected": -1.6125526428222656, "loss": 2.9255, "nll_loss": 0.6774047613143921, "rewards/accuracies": 0.875, "rewards/chosen": -0.13383150100708008, "rewards/margins": 0.027423767372965813, "rewards/rejected": -0.16125527024269104, "step": 1097 }, { "epoch": 2.895187870797627, "grad_norm": 11.200017929077148, "learning_rate": 2.768411712511091e-07, "log_odds_chosen": 0.47986552119255066, "log_odds_ratio": -0.49582991003990173, "logits/chosen": -1.0967844724655151, "logits/rejected": -1.026170253753662, "logps/chosen": -1.4546232223510742, "logps/rejected": -1.823494791984558, "loss": 2.9436, "nll_loss": 0.6863148808479309, "rewards/accuracies": 0.75, "rewards/chosen": -0.14546233415603638, "rewards/margins": 0.03688715398311615, "rewards/rejected": -0.18234948813915253, "step": 1098 }, { "epoch": 2.897824653922215, "grad_norm": 11.039349555969238, "learning_rate": 2.6974267968056785e-07, "log_odds_chosen": 0.4787980020046234, "log_odds_ratio": -0.4917075037956238, "logits/chosen": -1.0969046354293823, "logits/rejected": -1.0258355140686035, "logps/chosen": -1.362617015838623, "logps/rejected": -1.7399983406066895, "loss": 2.0708, "nll_loss": 0.4685228765010834, "rewards/accuracies": 1.0, "rewards/chosen": -0.1362617015838623, "rewards/margins": 0.0377381332218647, "rewards/rejected": -0.1739998459815979, "step": 1099 }, { "epoch": 2.900461437046803, "grad_norm": 10.629711151123047, "learning_rate": 2.626441881100266e-07, "log_odds_chosen": 0.38331663608551025, "log_odds_ratio": -0.5311778783798218, "logits/chosen": -1.2185980081558228, "logits/rejected": -1.1109384298324585, "logps/chosen": -1.3727366924285889, "logps/rejected": -1.677282691001892, "loss": 2.5856, "nll_loss": 0.5932785272598267, "rewards/accuracies": 0.875, "rewards/chosen": -0.1372736692428589, "rewards/margins": 0.03045460395514965, "rewards/rejected": -0.1677282750606537, "step": 1100 }, { "epoch": 2.903098220171391, "grad_norm": 10.82974910736084, "learning_rate": 2.5554569653948533e-07, "log_odds_chosen": 0.7012743353843689, "log_odds_ratio": -0.48878979682922363, "logits/chosen": -1.1110811233520508, "logits/rejected": -1.0399723052978516, "logps/chosen": -1.218574047088623, "logps/rejected": -1.8044946193695068, "loss": 2.571, "nll_loss": 0.5938761830329895, "rewards/accuracies": 0.875, "rewards/chosen": -0.1218574047088623, "rewards/margins": 0.058592043817043304, "rewards/rejected": -0.180449441075325, "step": 1101 }, { "epoch": 2.905735003295979, "grad_norm": 9.74654483795166, "learning_rate": 2.4844720496894407e-07, "log_odds_chosen": 0.4575854539871216, "log_odds_ratio": -0.49557802081108093, "logits/chosen": -1.0428321361541748, "logits/rejected": -0.9787229299545288, "logps/chosen": -1.2805407047271729, "logps/rejected": -1.61838960647583, "loss": 2.0251, "nll_loss": 0.45671436190605164, "rewards/accuracies": 1.0, "rewards/chosen": -0.12805408239364624, "rewards/margins": 0.033784881234169006, "rewards/rejected": -0.16183894872665405, "step": 1102 }, { "epoch": 2.9083717864205667, "grad_norm": 10.815143585205078, "learning_rate": 2.413487133984028e-07, "log_odds_chosen": 0.3740006983280182, "log_odds_ratio": -0.5430451035499573, "logits/chosen": -1.1571646928787231, "logits/rejected": -1.0375688076019287, "logps/chosen": -1.4973931312561035, "logps/rejected": -1.8008571863174438, "loss": 2.8253, "nll_loss": 0.652025043964386, "rewards/accuracies": 0.75, "rewards/chosen": -0.1497393250465393, "rewards/margins": 0.030346402898430824, "rewards/rejected": -0.18008573353290558, "step": 1103 }, { "epoch": 2.911008569545155, "grad_norm": 10.645405769348145, "learning_rate": 2.3425022182786155e-07, "log_odds_chosen": 0.49522343277931213, "log_odds_ratio": -0.4888048470020294, "logits/chosen": -1.2563395500183105, "logits/rejected": -1.1153737306594849, "logps/chosen": -1.2137320041656494, "logps/rejected": -1.5825142860412598, "loss": 2.5768, "nll_loss": 0.5953096747398376, "rewards/accuracies": 1.0, "rewards/chosen": -0.12137319892644882, "rewards/margins": 0.036878228187561035, "rewards/rejected": -0.15825143456459045, "step": 1104 }, { "epoch": 2.913645352669743, "grad_norm": 9.802302360534668, "learning_rate": 2.2715173025732032e-07, "log_odds_chosen": 0.7142266035079956, "log_odds_ratio": -0.4076310098171234, "logits/chosen": -1.095991611480713, "logits/rejected": -0.9405829906463623, "logps/chosen": -1.2853018045425415, "logps/rejected": -1.8538234233856201, "loss": 2.0316, "nll_loss": 0.46713969111442566, "rewards/accuracies": 1.0, "rewards/chosen": -0.12853017449378967, "rewards/margins": 0.05685216188430786, "rewards/rejected": -0.18538233637809753, "step": 1105 }, { "epoch": 2.916282135794331, "grad_norm": 10.226502418518066, "learning_rate": 2.2005323868677906e-07, "log_odds_chosen": 0.44650131464004517, "log_odds_ratio": -0.49772655963897705, "logits/chosen": -1.0860422849655151, "logits/rejected": -1.0122694969177246, "logps/chosen": -1.528808832168579, "logps/rejected": -1.8934898376464844, "loss": 2.2996, "nll_loss": 0.5251332521438599, "rewards/accuracies": 1.0, "rewards/chosen": -0.15288087725639343, "rewards/margins": 0.03646809235215187, "rewards/rejected": -0.1893489807844162, "step": 1106 }, { "epoch": 2.918918918918919, "grad_norm": 11.037799835205078, "learning_rate": 2.1295474711623777e-07, "log_odds_chosen": 0.36173897981643677, "log_odds_ratio": -0.5456938147544861, "logits/chosen": -1.2418875694274902, "logits/rejected": -1.1130543947219849, "logps/chosen": -1.4015865325927734, "logps/rejected": -1.6816915273666382, "loss": 2.9973, "nll_loss": 0.6947578191757202, "rewards/accuracies": 0.875, "rewards/chosen": -0.14015865325927734, "rewards/margins": 0.028010498732328415, "rewards/rejected": -0.16816917061805725, "step": 1107 }, { "epoch": 2.921555702043507, "grad_norm": 9.955268859863281, "learning_rate": 2.0585625554569652e-07, "log_odds_chosen": 0.5130655169487, "log_odds_ratio": -0.4895630180835724, "logits/chosen": -1.1983622312545776, "logits/rejected": -1.0804213285446167, "logps/chosen": -1.347536563873291, "logps/rejected": -1.7593469619750977, "loss": 2.4842, "nll_loss": 0.5720826983451843, "rewards/accuracies": 1.0, "rewards/chosen": -0.13475365936756134, "rewards/margins": 0.041181016713380814, "rewards/rejected": -0.17593468725681305, "step": 1108 }, { "epoch": 2.9241924851680947, "grad_norm": 11.561691284179688, "learning_rate": 1.9875776397515526e-07, "log_odds_chosen": 0.33087798953056335, "log_odds_ratio": -0.5504778623580933, "logits/chosen": -1.0546035766601562, "logits/rejected": -0.987433135509491, "logps/chosen": -1.4973101615905762, "logps/rejected": -1.7580746412277222, "loss": 2.6428, "nll_loss": 0.6056430339813232, "rewards/accuracies": 0.75, "rewards/chosen": -0.14973101019859314, "rewards/margins": 0.026076439768075943, "rewards/rejected": -0.17580744624137878, "step": 1109 }, { "epoch": 2.926829268292683, "grad_norm": 9.803023338317871, "learning_rate": 1.9165927240461402e-07, "log_odds_chosen": 0.6108183860778809, "log_odds_ratio": -0.4591115117073059, "logits/chosen": -1.1352243423461914, "logits/rejected": -1.1190838813781738, "logps/chosen": -1.2892515659332275, "logps/rejected": -1.7547543048858643, "loss": 2.2918, "nll_loss": 0.5270448327064514, "rewards/accuracies": 1.0, "rewards/chosen": -0.128925159573555, "rewards/margins": 0.04655028134584427, "rewards/rejected": -0.17547544836997986, "step": 1110 }, { "epoch": 2.929466051417271, "grad_norm": 10.088869094848633, "learning_rate": 1.8456078083407276e-07, "log_odds_chosen": 0.6438597440719604, "log_odds_ratio": -0.44443920254707336, "logits/chosen": -1.1769981384277344, "logits/rejected": -0.9903606176376343, "logps/chosen": -1.3865466117858887, "logps/rejected": -1.9032771587371826, "loss": 2.5475, "nll_loss": 0.592436671257019, "rewards/accuracies": 0.875, "rewards/chosen": -0.1386546492576599, "rewards/margins": 0.051673054695129395, "rewards/rejected": -0.1903277039527893, "step": 1111 }, { "epoch": 2.932102834541859, "grad_norm": 10.34731388092041, "learning_rate": 1.7746228926353148e-07, "log_odds_chosen": 0.8123482465744019, "log_odds_ratio": -0.3837769627571106, "logits/chosen": -1.1974098682403564, "logits/rejected": -1.0578560829162598, "logps/chosen": -1.285667061805725, "logps/rejected": -1.8774243593215942, "loss": 2.5631, "nll_loss": 0.6024080514907837, "rewards/accuracies": 1.0, "rewards/chosen": -0.128566712141037, "rewards/margins": 0.05917573720216751, "rewards/rejected": -0.1877424567937851, "step": 1112 }, { "epoch": 2.934739617666447, "grad_norm": 9.824599266052246, "learning_rate": 1.7036379769299022e-07, "log_odds_chosen": 0.6928430199623108, "log_odds_ratio": -0.4147317409515381, "logits/chosen": -1.1532843112945557, "logits/rejected": -1.0601625442504883, "logps/chosen": -1.162044644355774, "logps/rejected": -1.6832411289215088, "loss": 1.9871, "nll_loss": 0.455294668674469, "rewards/accuracies": 1.0, "rewards/chosen": -0.11620447039604187, "rewards/margins": 0.05211963877081871, "rewards/rejected": -0.16832411289215088, "step": 1113 }, { "epoch": 2.937376400791035, "grad_norm": 10.522456169128418, "learning_rate": 1.6326530612244896e-07, "log_odds_chosen": 0.3960050940513611, "log_odds_ratio": -0.5374730229377747, "logits/chosen": -1.1554762125015259, "logits/rejected": -1.0569953918457031, "logps/chosen": -1.5269532203674316, "logps/rejected": -1.844236135482788, "loss": 2.5369, "nll_loss": 0.5804662704467773, "rewards/accuracies": 0.75, "rewards/chosen": -0.15269532799720764, "rewards/margins": 0.03172829747200012, "rewards/rejected": -0.18442362546920776, "step": 1114 }, { "epoch": 2.9400131839156227, "grad_norm": 9.986109733581543, "learning_rate": 1.5616681455190773e-07, "log_odds_chosen": 0.7218302488327026, "log_odds_ratio": -0.4211685359477997, "logits/chosen": -1.066129207611084, "logits/rejected": -0.9383447766304016, "logps/chosen": -1.347313642501831, "logps/rejected": -1.9073268175125122, "loss": 2.5148, "nll_loss": 0.5865715742111206, "rewards/accuracies": 0.875, "rewards/chosen": -0.13473136723041534, "rewards/margins": 0.05600131303071976, "rewards/rejected": -0.1907326877117157, "step": 1115 }, { "epoch": 2.942649967040211, "grad_norm": 10.249882698059082, "learning_rate": 1.4906832298136647e-07, "log_odds_chosen": 0.5400087237358093, "log_odds_ratio": -0.46870461106300354, "logits/chosen": -1.1922576427459717, "logits/rejected": -1.0959630012512207, "logps/chosen": -1.3472270965576172, "logps/rejected": -1.7658050060272217, "loss": 2.8924, "nll_loss": 0.6762197017669678, "rewards/accuracies": 1.0, "rewards/chosen": -0.13472270965576172, "rewards/margins": 0.041857779026031494, "rewards/rejected": -0.1765804886817932, "step": 1116 }, { "epoch": 2.945286750164799, "grad_norm": 10.107986450195312, "learning_rate": 1.4196983141082518e-07, "log_odds_chosen": 0.6772925853729248, "log_odds_ratio": -0.4206685423851013, "logits/chosen": -1.0564854145050049, "logits/rejected": -1.0141981840133667, "logps/chosen": -1.4403352737426758, "logps/rejected": -1.99358069896698, "loss": 2.1219, "nll_loss": 0.4884171485900879, "rewards/accuracies": 1.0, "rewards/chosen": -0.1440335214138031, "rewards/margins": 0.05532454699277878, "rewards/rejected": -0.19935807585716248, "step": 1117 }, { "epoch": 2.947923533289387, "grad_norm": 10.683755874633789, "learning_rate": 1.3487133984028392e-07, "log_odds_chosen": 0.5642454624176025, "log_odds_ratio": -0.4553692638874054, "logits/chosen": -1.1627285480499268, "logits/rejected": -1.0698070526123047, "logps/chosen": -1.3500248193740845, "logps/rejected": -1.7949421405792236, "loss": 2.4335, "nll_loss": 0.5628418922424316, "rewards/accuracies": 1.0, "rewards/chosen": -0.1350024789571762, "rewards/margins": 0.044491734355688095, "rewards/rejected": -0.1794942021369934, "step": 1118 }, { "epoch": 2.950560316413975, "grad_norm": 10.497546195983887, "learning_rate": 1.2777284826974266e-07, "log_odds_chosen": 0.6890730261802673, "log_odds_ratio": -0.41999584436416626, "logits/chosen": -1.1632972955703735, "logits/rejected": -1.0566202402114868, "logps/chosen": -1.183455228805542, "logps/rejected": -1.707749366760254, "loss": 2.0868, "nll_loss": 0.47970157861709595, "rewards/accuracies": 1.0, "rewards/chosen": -0.11834551393985748, "rewards/margins": 0.052429430186748505, "rewards/rejected": -0.1707749366760254, "step": 1119 }, { "epoch": 2.953197099538563, "grad_norm": 9.32829761505127, "learning_rate": 1.206743566992014e-07, "log_odds_chosen": 0.49325263500213623, "log_odds_ratio": -0.4847280979156494, "logits/chosen": -1.024897575378418, "logits/rejected": -0.9912652969360352, "logps/chosen": -1.1244151592254639, "logps/rejected": -1.4825202226638794, "loss": 1.5474, "nll_loss": 0.338373064994812, "rewards/accuracies": 1.0, "rewards/chosen": -0.1124415248632431, "rewards/margins": 0.03581049665808678, "rewards/rejected": -0.14825202524662018, "step": 1120 }, { "epoch": 2.9558338826631507, "grad_norm": 9.516386985778809, "learning_rate": 1.1357586512866016e-07, "log_odds_chosen": 0.7329659461975098, "log_odds_ratio": -0.41048258543014526, "logits/chosen": -1.1054126024246216, "logits/rejected": -1.0205689668655396, "logps/chosen": -1.1692774295806885, "logps/rejected": -1.732006549835205, "loss": 1.8455, "nll_loss": 0.42032352089881897, "rewards/accuracies": 1.0, "rewards/chosen": -0.11692775040864944, "rewards/margins": 0.056272901594638824, "rewards/rejected": -0.17320065200328827, "step": 1121 }, { "epoch": 2.958470665787739, "grad_norm": 10.708715438842773, "learning_rate": 1.0647737355811889e-07, "log_odds_chosen": 0.40624159574508667, "log_odds_ratio": -0.5160567760467529, "logits/chosen": -1.0800803899765015, "logits/rejected": -1.0116575956344604, "logps/chosen": -1.4815709590911865, "logps/rejected": -1.7999603748321533, "loss": 2.5454, "nll_loss": 0.5847477912902832, "rewards/accuracies": 0.875, "rewards/chosen": -0.14815710484981537, "rewards/margins": 0.031838931143283844, "rewards/rejected": -0.1799960434436798, "step": 1122 }, { "epoch": 2.961107448912327, "grad_norm": 9.641554832458496, "learning_rate": 9.937888198757763e-08, "log_odds_chosen": 0.8635714650154114, "log_odds_ratio": -0.38064178824424744, "logits/chosen": -1.1759165525436401, "logits/rejected": -1.0470998287200928, "logps/chosen": -1.2402002811431885, "logps/rejected": -1.936885118484497, "loss": 2.0752, "nll_loss": 0.48072531819343567, "rewards/accuracies": 1.0, "rewards/chosen": -0.1240200400352478, "rewards/margins": 0.0696684867143631, "rewards/rejected": -0.1936885118484497, "step": 1123 }, { "epoch": 2.963744232036915, "grad_norm": 10.646029472351074, "learning_rate": 9.228039041703638e-08, "log_odds_chosen": 0.4761095345020294, "log_odds_ratio": -0.499419629573822, "logits/chosen": -1.171363353729248, "logits/rejected": -1.0856765508651733, "logps/chosen": -1.3061269521713257, "logps/rejected": -1.6765310764312744, "loss": 2.4182, "nll_loss": 0.5546119213104248, "rewards/accuracies": 1.0, "rewards/chosen": -0.13061270117759705, "rewards/margins": 0.03704041987657547, "rewards/rejected": -0.1676531285047531, "step": 1124 }, { "epoch": 2.966381015161503, "grad_norm": 9.832589149475098, "learning_rate": 8.518189884649511e-08, "log_odds_chosen": 0.620135486125946, "log_odds_ratio": -0.44997426867485046, "logits/chosen": -1.1520901918411255, "logits/rejected": -1.0546834468841553, "logps/chosen": -1.2627184391021729, "logps/rejected": -1.7329437732696533, "loss": 2.2203, "nll_loss": 0.5100676417350769, "rewards/accuracies": 0.875, "rewards/chosen": -0.12627184391021729, "rewards/margins": 0.04702254384756088, "rewards/rejected": -0.17329436540603638, "step": 1125 }, { "epoch": 2.969017798286091, "grad_norm": 10.106729507446289, "learning_rate": 7.808340727595386e-08, "log_odds_chosen": 0.4591771364212036, "log_odds_ratio": -0.5125027894973755, "logits/chosen": -1.1476519107818604, "logits/rejected": -1.0820443630218506, "logps/chosen": -1.341275691986084, "logps/rejected": -1.709936499595642, "loss": 2.5364, "nll_loss": 0.5828558206558228, "rewards/accuracies": 0.875, "rewards/chosen": -0.13412757217884064, "rewards/margins": 0.03686608374118805, "rewards/rejected": -0.1709936410188675, "step": 1126 }, { "epoch": 2.9716545814106787, "grad_norm": 10.069293022155762, "learning_rate": 7.098491570541259e-08, "log_odds_chosen": 0.5392491221427917, "log_odds_ratio": -0.47593629360198975, "logits/chosen": -1.0493738651275635, "logits/rejected": -0.9915323257446289, "logps/chosen": -1.2978014945983887, "logps/rejected": -1.7215747833251953, "loss": 2.1687, "nll_loss": 0.49458765983581543, "rewards/accuracies": 0.875, "rewards/chosen": -0.12978015840053558, "rewards/margins": 0.04237733036279678, "rewards/rejected": -0.17215748131275177, "step": 1127 }, { "epoch": 2.974291364535267, "grad_norm": 9.97978687286377, "learning_rate": 6.388642413487133e-08, "log_odds_chosen": 0.6298450231552124, "log_odds_ratio": -0.4396544098854065, "logits/chosen": -1.1477969884872437, "logits/rejected": -0.9882994890213013, "logps/chosen": -1.461350440979004, "logps/rejected": -1.9838271141052246, "loss": 2.4329, "nll_loss": 0.5642586946487427, "rewards/accuracies": 1.0, "rewards/chosen": -0.14613503217697144, "rewards/margins": 0.052247680723667145, "rewards/rejected": -0.19838272035121918, "step": 1128 }, { "epoch": 2.976928147659855, "grad_norm": 10.508831977844238, "learning_rate": 5.678793256433008e-08, "log_odds_chosen": 0.25923722982406616, "log_odds_ratio": -0.5890034437179565, "logits/chosen": -1.1681803464889526, "logits/rejected": -1.0757927894592285, "logps/chosen": -1.3958204984664917, "logps/rejected": -1.6009249687194824, "loss": 2.7438, "nll_loss": 0.6270577311515808, "rewards/accuracies": 0.875, "rewards/chosen": -0.1395820528268814, "rewards/margins": 0.020510446280241013, "rewards/rejected": -0.16009250283241272, "step": 1129 }, { "epoch": 2.979564930784443, "grad_norm": 10.465526580810547, "learning_rate": 4.9689440993788814e-08, "log_odds_chosen": 0.502131998538971, "log_odds_ratio": -0.48989468812942505, "logits/chosen": -1.0431938171386719, "logits/rejected": -0.9729862213134766, "logps/chosen": -1.2752975225448608, "logps/rejected": -1.6618212461471558, "loss": 1.6744, "nll_loss": 0.3696220815181732, "rewards/accuracies": 1.0, "rewards/chosen": -0.12752975523471832, "rewards/margins": 0.03865238279104233, "rewards/rejected": -0.16618213057518005, "step": 1130 }, { "epoch": 2.982201713909031, "grad_norm": 10.241425514221191, "learning_rate": 4.2590949423247555e-08, "log_odds_chosen": 0.5095721483230591, "log_odds_ratio": -0.4832859933376312, "logits/chosen": -1.03834867477417, "logits/rejected": -0.9979722499847412, "logps/chosen": -1.2013146877288818, "logps/rejected": -1.5752215385437012, "loss": 2.0638, "nll_loss": 0.46762895584106445, "rewards/accuracies": 0.875, "rewards/chosen": -0.1201314628124237, "rewards/margins": 0.037390682846307755, "rewards/rejected": -0.15752214193344116, "step": 1131 }, { "epoch": 2.984838497033619, "grad_norm": 9.945733070373535, "learning_rate": 3.5492457852706296e-08, "log_odds_chosen": 0.5073897242546082, "log_odds_ratio": -0.48286592960357666, "logits/chosen": -1.1669549942016602, "logits/rejected": -1.0747926235198975, "logps/chosen": -1.213772177696228, "logps/rejected": -1.6001689434051514, "loss": 2.2927, "nll_loss": 0.5248969197273254, "rewards/accuracies": 0.875, "rewards/chosen": -0.12137721478939056, "rewards/margins": 0.03863967955112457, "rewards/rejected": -0.16001689434051514, "step": 1132 }, { "epoch": 2.9874752801582067, "grad_norm": 10.28751277923584, "learning_rate": 2.839396628216504e-08, "log_odds_chosen": 0.5915455222129822, "log_odds_ratio": -0.44667544960975647, "logits/chosen": -1.124068021774292, "logits/rejected": -0.9785863161087036, "logps/chosen": -1.3051058053970337, "logps/rejected": -1.7666263580322266, "loss": 2.0903, "nll_loss": 0.477913498878479, "rewards/accuracies": 1.0, "rewards/chosen": -0.1305105835199356, "rewards/margins": 0.046152062714099884, "rewards/rejected": -0.1766626536846161, "step": 1133 }, { "epoch": 2.990112063282795, "grad_norm": 10.091132164001465, "learning_rate": 2.1295474711623777e-08, "log_odds_chosen": 0.5201779007911682, "log_odds_ratio": -0.4749506711959839, "logits/chosen": -1.118676781654358, "logits/rejected": -1.0187113285064697, "logps/chosen": -1.2404669523239136, "logps/rejected": -1.6390795707702637, "loss": 2.0141, "nll_loss": 0.4560197591781616, "rewards/accuracies": 1.0, "rewards/chosen": -0.1240466982126236, "rewards/margins": 0.03986125811934471, "rewards/rejected": -0.1639079451560974, "step": 1134 }, { "epoch": 2.992748846407383, "grad_norm": 11.591419219970703, "learning_rate": 1.419698314108252e-08, "log_odds_chosen": 0.4546339809894562, "log_odds_ratio": -0.5016353726387024, "logits/chosen": -1.183355450630188, "logits/rejected": -1.048933744430542, "logps/chosen": -1.4572858810424805, "logps/rejected": -1.8160556554794312, "loss": 3.2187, "nll_loss": 0.7545139193534851, "rewards/accuracies": 1.0, "rewards/chosen": -0.14572857320308685, "rewards/margins": 0.03587697818875313, "rewards/rejected": -0.18160554766654968, "step": 1135 }, { "epoch": 2.995385629531971, "grad_norm": 10.501843452453613, "learning_rate": 7.09849157054126e-09, "log_odds_chosen": 0.46021127700805664, "log_odds_ratio": -0.558140754699707, "logits/chosen": -1.1137306690216064, "logits/rejected": -0.9876527190208435, "logps/chosen": -1.424965500831604, "logps/rejected": -1.8375980854034424, "loss": 2.3125, "nll_loss": 0.5223027467727661, "rewards/accuracies": 0.625, "rewards/chosen": -0.14249655604362488, "rewards/margins": 0.041263267397880554, "rewards/rejected": -0.18375982344150543, "step": 1136 }, { "epoch": 2.998022412656559, "grad_norm": 10.061162948608398, "learning_rate": 0.0, "log_odds_chosen": 0.4221417009830475, "log_odds_ratio": -0.5208521485328674, "logits/chosen": -1.1026612520217896, "logits/rejected": -1.025718331336975, "logps/chosen": -1.3111175298690796, "logps/rejected": -1.620469570159912, "loss": 2.165, "nll_loss": 0.48915278911590576, "rewards/accuracies": 0.875, "rewards/chosen": -0.1311117708683014, "rewards/margins": 0.030935190618038177, "rewards/rejected": -0.16204693913459778, "step": 1137 } ], "logging_steps": 1, "max_steps": 1137, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }